Appstore proj: Calculate an App's Top-5 related apps(1)

本文介绍了如何计算应用之间的相似性,通过使用余弦相似度来实现。在helper.py中定义了cosine_similarity函数,以便在需要时调用。同时在dataservice.py中,检索并存储数据,展示了一个示例,为C10107104应用找到了Top-5相关应用。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

How to calculate similarity?

use cosine similarity!





Edit helper.py.

So you can call the function in helper class when needed

>>> helper = Helper()

similarity = helper.cosine_similarity(app_list1, app_list2)

import operator
import math

class Helper(object):
    @classmethod
    def cosine_similarity(cls, app_list1, app_list2):
        match_count = cls.__count_match(app_list1, app_list2)
        return float(match_count) / math.sqrt( len(app_list1) * len(app_list2))

    @classmethod
    def __count_match(cls, list1, list2):
        count = 0
        for element in list1:
            if element in list2:
                count += 1
        return count

def calculate_top_5(app, user_download_history):
    #create a dict to store each other app and its similarity to this app
    app_similarity = {}  #{app_id: similarity}
    for apps in user_download_history:
        #calculate the similarity
        similarity = Helper.cosine_similarity([app], apps)
        for other_app in apps:
            if app_similarity.has_key(other_app):
                app_similarity[other_app] = app_similarity[other_app] + similarity
            else:
                app_similarity[other_app] = similarity

        # There could be app without related apps (not in any download history)
    if not app_similarity.has_key(app):
        return

        #sort app_similarity dict by value and get the top 5 as recommendation
    app_similarity.pop(app)
    sorted_tups = sorted(app_similarity.items(), key=operator.itemgetter(1), reverse=True) #sort by similarity
    top_5_app = [sorted_tups[0][0], sorted_tups[1][0], sorted_tups[2][0], sorted_tups[3][0], sorted_tups[4][0]]
    print("top_5_app for " + str(app) + ":\t" + str(top_5_app))


Edit dataservice.py

retrieve and store data

from pymongo import MongoClient
import random

# about data
class DataService(object):
    @classmethod
    def init(cls, client):
        cls.client = client
        cls.db = client.appstore
        cls.user_download_history = cls.db.user_download_history
        cls.app_info = cls.db.app_info
        
    @classmethod
    def retrieve_user_download_history(cls, filter_dict={}):
        #return a dict {user_id: download_history} containing user download history data
        #return all data in the collection if no filter is specified
        result = {}
        cursor = cls.user_download_history.find(filter_dict)
        for user_download_history in cursor:
            result[user_download_history['user_id']] = user_download_history['download_history']
        return result

Edit main.py

from pymongo import MongoClient
from dataservice import DataService
from helper import calculate_top_5

def main():
      try:
          #get MongoDB client and set it in DataService
          client = MongoClient('localhost', 27017)
          DataService.init(client)
          #work flow
          user_download_history = DataService.retrieve_user_download_history()
          calculate_top_5('C10107104', user_download_history.values())
      except Exception as e:
          print(e)
      finally:
          #clean up work
          if 'client' in locals():
              client.close()

if __name__ == "__main__":
      main()

output:

top_5_app for C10107104: [u'C10129690', u'C5341', u'C20252', u'C10191382', u'C183901']


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值