协同过滤算法,python实现

在网易公开课上看了吴恩达的课。自己用python实现了一遍



import numpy as np

R=2
alpha=0.01
beta=0.5

def norm_dataset(data_set,bool_set):
    nu=len(data_set);nm=len(data_set[0]);miu=[]
    for j in range(nm):
        sum_score=0.0;com_num=0.0
        for i in range(nu):
            if bool_set[i][j]:
                sum_score=sum_score+data_set[i][j]
                com_num=com_num+1.0
        miu.append(sum_score/com_num)
        for i in range(nu):
            if bool_set[i][j]:
                data_set[i][j]=data_set[i][j]*1.0-sum_score*1.0/com_num
            else:
                data_set[i][j]=-1.0
    return miu

def calc_loss(x,theta,data_set,bool_set):
    nu=len(data_set);nm=len(data_set[0]);total_loss=0.0
    for i in range(nu):
        for j in range(nm):
            if bool_set[i][j]:
                xTtheta=0.0;x_loss=0.0;theta_loss=0.0
                for k in range(R):
                    xTtheta=xTtheta+x[i,k]*theta[k,j]
                    x_loss=x_loss+x[i,k]*x[i,k]
                    theta_loss=theta_loss+theta[k,j]*theta[k,j]
                total_loss=total_loss+0.5*(xTtheta-data_set[i][j])*(xTtheta-data_set[i][j])+beta*x_loss+beta*theta_loss
    return total_loss

def eval(x,theta,data_set,bool_set):
    nu=len(data_set);nm=len(data_set[0]);total_loss=0.0
    for i in range(nu):
        for j in range(nm):
            if bool_set[i][j]:
                xTtheta=0.0;x_loss=0.0;theta_loss=0.0
                for k in range(R):
                    xTtheta=xTtheta+x[i,k]*theta[k,j]
                total_loss=total_loss+0.5*(xTtheta-data_set[i][j])*(xTtheta-data_set[i][j])
    return total_loss

def new_x_and_theta(x,theta,data_set,bool_set):
    nu=len(data_set);nm=len(data_set[0]);total_loss=0.0;xTtheta_minus_y=0.0
    x_partial=np.zeros((nu,R));theta_partial=np.zeros((R,nm))
    for i in range(nu):
        for j in range(nm):
            xTtheta=0.0;
            if bool_set[i][j]:
                for k in range(R):
                    xTtheta=xTtheta+x[i,k]*theta[k,j]
                for k in range(R):
                    x_partial[i,k]=x_partial[i,k]+((xTtheta-data_set[i][j])*theta[k,j]+beta*x[i,k])
                for k in range(R):
                    theta_partial[k,j]=theta_partial[k,j]+((xTtheta-data_set[i][j])*x[i,k]+beta*theta[k,j])
    for i in range(nu):
        for k in range(R):
            x[i,k]=x[i,k]-alpha*x_partial[i,k]
    for j in range(nm):
        for k in range(R):
            theta[k,j]=theta[k,j]-alpha*theta_partial[k,j]

if __name__ == '__main__':
    data_set=[[5,5,0,0],[5,-1,-1,0],[-1,4,0,-1],[0,0,5,4],[0,0,5,-1],[-1,-1,-1,-1]]
    bool_set=[[],[],[],[],[],[]]
    nu=len(data_set);nm=len(data_set[0])
    for i in range(nu):
        for j in range(nm):
            if data_set[i][j]!=-1:
                bool_set[i].append(True)
            else:
                bool_set[i].append(False)
    x=np.random.random(size=(nu,R));theta=np.random.random(size=(R,nm))
    miu=norm_dataset(data_set,bool_set)
    print(data_set)
    print(miu)
    min_loss=10000000
    final_x=np.random.random(size=(nu,R));final_theta=np.random.random(size=(R,nm))
    for e in range(1000):
        loss=calc_loss(x,theta,data_set,bool_set)
        if(loss<min_loss):
            # print(min_loss)
            min_loss=loss
            final_x=x.copy()
            final_theta=theta.copy()
        new_x_and_theta(x,theta,data_set,bool_set)
    print(final_x)
    print(final_theta)
    print(eval(final_x,final_theta,data_set,bool_set))
    result=np.zeros((nu,nm))
    for i in range(nu):
        for j in range(nm):
            for k in range(R):
                result[i,j]=result[i,j]+final_x[i,k]*final_theta[k,j]
            result[i,j]=result[i][j]+miu[j]
    for i in range(nu):
        for j in range(nm):
            if bool_set[i][j]:
                data_set[i][j]=data_set[i][j]+miu[j]
    print(result)
    print(data_set)

### 协同过滤算法概述 协同过滤是一种基于用户行为数据来预测用户可能感兴趣项目的推荐技术。该方法通过分析大量用户的偏好信息,找出具有相似兴趣的群体,并据此向目标用户提供个性化建议[^2]。 ### 基于物品的协同过滤实现 下面展示了一个简单版本的基于物品的协同过滤算法Python代码示例: ```python from collections import defaultdict import numpy as np def load_dataset(): """模拟加载评分矩阵""" data = { 'User1': {'ItemA': 5, 'ItemB': 3}, 'User2': {'ItemA': 4, 'ItemC': 2}, 'User3': {'ItemB': 5, 'ItemC': 5} } return data def calculate_similarity(data): """计算项目之间的余弦相似度""" item_users = {} # 构建倒排表 for user, items in data.items(): for item in items: if item not in item_users: item_users[item] = [] item_users[item].append(user) similarities = {} # 计算每一对商品间的共现频率和支持度计数 cooccurrences = defaultdict(lambda: defaultdict(int)) supports = defaultdict(int) for users in item_users.values(): for i in range(len(users)): for j in range(i+1, len(users)): u, v = sorted([users[i], users[j]]) cooccurrences[u][v] += 1 supports[u] += 1 supports[v] += 1 # 转换为相似度分数 for u in cooccurrences: for v in cooccurrences[u]: sim_score = cooccurrences[u][v] / (np.sqrt(supports[u]*supports[v])) similarities[(u,v)] = sim_score return similarities def recommend_items(target_user, ratings, sims, top_n=3): """为目标用户推荐top N个项目""" unseen_items = set(ratings.keys()) - set(ratings[target_user].keys()) scores = {item : sum(sim * ratings[user][item] for user,item2 in sims if item==item2 and user!=target_user) for item in unseen_items} recommendations = sorted(scores.items(), key=lambda kv:(kv[1]), reverse=True)[:top_n] return dict(recommendations) if __name__ == '__main__': dataset = load_dataset() similarity_scores = calculate_similarity(dataset) target_user = "User1" recos = recommend_items(target_user, dataset, similarity_scores) print(f"Recommendations for {target_user}: ", recos) ``` 此段代码展示了如何构建一个基本框架来进行基于物品的协同过滤推荐。首先定义了一些辅助函数用于创建测试用的数据集以及执行实际的推荐逻辑;接着实现了`calculate_similarity()` 函数用来衡量两个对象之间关系紧密程度的一个量化指标——这里采用的是经典的皮尔逊相关系数变体即调整后的余弦相似度[^1];最后,在 `recommend_items()` 中利用这些相似性得分去挑选最有可能吸引特定用户的那些未曾接触过的条目作为最终输出结果[^3]。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值