SimpleTagBased

本文介绍了一个简单的基于标签的推荐系统实现过程。该系统通过分析用户标记的数据来预测用户可能感兴趣的内容。文中详细描述了数据加载、数据集划分、状态初始化及推荐算法等步骤,并通过精确率和召回率评估了推荐效果。
'''
Created on 2014-03-05
@Author:Dior
'''
import random
import math
import operator

class SimpleTagBased():
    
    #The constructor function
    def __init__(self,filename):
        self.filename=filename
        #self.N=N
        self.loadData()
        self.randomlySplitData(0.2)
        self.initStat()
        self.testRecommend()
        
    #read in the data file
    def loadData(self):
        print "##################load data begin#######################"
        filename=self.filename
        self.records={}
        fi=open(filename)
        lineNum=0
        for line in fi:
            lineNum+=1
            if lineNum==1:
                continue
            uid,iid,tag,timestamp=line.split('\t')
            uid=int(uid)-1
            iid=int(iid)-1
            tag=int(tag)-1
            self.records.setdefault(uid,{})
            self.records[uid].setdefault(iid,[])
            self.records[uid][iid].append(tag)
        fi.close()
        print "Load data success.The total records is %d." % (lineNum)
        print "The total records number is %d." % (len(self.records))
        print "##################load data end#######################\n"
    
    #Randomly split the data into training set and testing set    
    def randomlySplitData(self,ratio,seed=100):
        print "################beginning to split data#####################"
        random.seed(seed)
        self.train=dict()
        self.test=dict()
        for u in self.records.keys():
            for i in self.records[u].keys():
                if random.random()<ratio:
                    self.test.setdefault(u,{})
                    self.test[u].setdefault(i,[])
                    for t in self.records[u][i]:
                        self.test[u][i].append(t)
                else:
                    self.train.setdefault(u,{})
                    self.train[u].setdefault(i,[])
                    for t in self.records[u][i]:
                        self.train[u][i].append(t)        
        print "Split data complete."
        print "The length of train set is %d,the length of test set is %d." % (len(self.train),len(self.test))
        print "##################split data end#######################\n"
    
    #Initialize the user_tags,tag_items and user_items dictionary    
    def initStat(self):
        print "##################initstat begin#######################"
        records=self.train
        self.user_tags=dict()
        self.tag_items=dict()
        self.user_items=dict()
        for u,items in records.items():
            for i,tags in items.items():
                for tag in tags:
                    #print tag
                    self._addValueToMat(self.user_tags,u,tag,1)
                    self._addValueToMat(self.tag_items,tag,i,1)
                    self._addValueToMat(self.user_items,u,i,1)
        print "Initialize state complete."
        print "The length of the user_tags is %d,the length of the tag_items is %d,the length of the user_items is %d" % (len(self.user_tags),len(self.tag_items),len(self.user_items))
        print "##################initstat end#######################\n"
    
    #The private function which is used to add value to matrix    
    def _addValueToMat(self,mat,index,item,value=1):
        #the private function which is used to add value to matrix
        if index not in mat:
            mat.setdefault(index,{})
            mat[index].setdefault(item,value)
        else:
            if item not in mat[index]:
                mat[index][item]=value
            else:
                mat[index][item]+=value
    
    #The precision and recall
    def precisionAndRecall(self,N):
        #print "##################precisionAndRecall begin#######################"
        #print "Beginning calculating......"
        hit=0
        h_recall=0
        h_precision=0
        for user,items in self.test.items():
            if user not in self.train:
                continue
            rank=self.recommend(user,N)
            for item,rui in rank:
                if item in items:
                    hit+=1
            #print "The items in test set is:"
            #print items
            #print "The items in recommended set is:"
            #print rank
            h_recall+=len(items)
            h_precision+=N
        #print hit        
        #print "Calculating end....."
        #print "##################precisionAndRecall end#######################"
        return (hit/(h_precision*1.0)),(hit/(h_recall*1.0))
    
    #The recommend function
    def recommend(self,user,N):
        recommend_items=dict()
        #N=self.N
        tagged_items=self.user_items[user]     
        for tag,wut in self.user_tags[user].items():
            for item,wti in self.tag_items[tag].items():
                if item in tagged_items:
                    continue
                if item not in recommend_items:
                    recommend_items[item]=wut*wti
                else:
                    recommend_items[item]+=wut*wti
        return sorted(recommend_items.items(),key=operator.itemgetter(1),reverse=True)[0:N]
    
    #Test recommend function
    def testRecommend(self):
        print "##################testRecommend begin#######################"
        #precision,recall=self.precisionAndRecall()
        print "%3s%20s%20s" % ('K',"recall",'precision')
        for n in [5,10,20,40,60,80,160]:
            precision,recall=self.precisionAndRecall(n)
            print "%3d%19.3f%%%19.3f%%" % (n,recall * 100,precision * 100)
        #print "The precision is %f,the recall is %f" % (precision,recall)
        print "##################testRecommend end#######################\n"
        
        
if __name__=='__main__':
    stb=SimpleTagBased("E:\\RecommenderSystem\\datasets\\hetrec2011-delicious-2k\\user_taggedbookmarks-timestamps.dat")
    #print stb.user_items




                
在数据驱动的活动推荐系统中,用户画像起到了关键作用。推荐系统作为互联网高频应用,能提升用户体验、促进商业成长,可给用户提供个性化的商品建议、引导购买决策等,帮助用户从海量数据中快速找到所需信息和服务,从而获得优质的用户体验和商业收益[^1]。 在电商领域,借助InsCode AI IDE和DeepSeek R1 API,开发者能利用用户画像实现用户分群和动态调整功能。通过根据购买习惯、兴趣爱好等维度将用户分为不同群体,为每个群体制定专属的促销策略;实时监控用户行为,动态更新画像,确保推荐内容始终符合用户当前需求[^2]。 从算法角度,SimpleTagBased算法可用于基于用户画像的推荐。该算法统计每个用户的常用标签,对每个标签,统计被打过这个标签次数最多的商品,对于一个用户,找到其常用的标签,然后找到具有这些标签的最热门物品推荐给他。用户u对商品i的兴趣可用公式 score(u,i)=∑tuse_tags[u,t]∗tag_items[t,i] 来表示[^4]。 在系统设计与实现过程中,还需要考虑如大数据技术与用户画像在现代企业决策中的核心作用,包括构建方法论、技术架构和实施策略等。同时,还需应对隐私保护、伦理考量和技术挑战等关键问题,以构建可持续的大数据决策系统[^3]。 ```python # 以下是一个简单的示例代码,模拟基于标签的推荐系统 # 假设 use_tags 是用户-标签矩阵,tag_items 是标签-商品矩阵 import numpy as np # 示例数据 use_tags = np.array([[1, 2, 0], [0, 1, 3]]) # 2 个用户,3 个标签 tag_items = np.array([[2, 1], [1, 3], [0, 2]]) # 3 个标签,2 个商品 def simple_tag_based_recommendation(use_tags, tag_items): num_users = use_tags.shape[0] num_items = tag_items.shape[1] scores = np.zeros((num_users, num_items)) for u in range(num_users): for i in range(num_items): score = 0 for t in range(use_tags.shape[1]): score += use_tags[u, t] * tag_items[t, i] scores[u, i] = score return scores # 计算得分 scores = simple_tag_based_recommendation(use_tags, tag_items) print("用户对商品的兴趣得分:", scores) ```
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值