Music List: 最近喜欢听的

def get_vector_recommendations(self, target_user_id, n_similar_users=10, n_recommendations=10): print(f"\n为用户 {target_user_id} 生成基于向量的推荐...") # 1. 数据预处理:获取所有用户数据并转换为向量 all_users_data = {} all_music_ids = set() # 获取所有音乐数据,构建音乐ID映射 query = {"userMusicList": {"$exists": True, "$ne": []}} for user in self.collection.find(query).limit(3000): user_id = str(user['userId']) if 'userMusicList' in user: # 保存用户的音乐列表 all_users_data[user_id] = { 'music_list': user['userMusicList'], 'vector': None # 稍后填充 } # 收集所有音乐ID for music in user['userMusicList']: all_music_ids.add(str(music['musicId'])) # 将音乐ID转换为索引映射 music_id_to_index = {mid: idx for idx, mid in enumerate(sorted(all_music_ids))} vector_size = len(music_id_to_index) print(f"总音乐数量: {vector_size}") print(f"总用户数量: {len(all_users_data)}") # 2. 构建用户向量 for user_id, user_data in all_users_data.items(): # 初始化用户向量 user_vector = np.zeros(vector_size) # 填充向量(使用评分作为权重) for music in user_data['music_list']: music_idx = music_id_to_index[str(music['musicId'])] user_vector[music_idx] = float(music['musicScore']) / 100.0 # 归一化评分 # 保存用户向量 all_users_data[user_id]['vector'] = user_vector # 3. 计算目标用户与其他用户的相似度 target_vector = all_users_data[target_user_id]['vector'] similarities = [] for user_id, user_data in all_users_data.items(): if user_id != target_user_id: similarity = cosine_similarity([target_vector], [user_data['vector']])[0][0] similarities.append((user_id, similarity)) # 4. 找到最相似的K个用户 similar_users = sorted(similarities, key=lambda x: x[1], reverse=True)[:n_similar_users] print(f"\n找到的{n_similar_users}个最相似用户:") for user_id, sim in similar_users: print(f"用户 {user_id}: 相似度 {sim:.3f}") # 5. 收集推荐候选歌曲 target_music_ids = {str(m['musicId']) for m in all_users_data[target_user_id]['music_list']} candidate_songs = defaultdict(lambda: {'score': 0, 'count': 0, 'weighted_sum': 0}) for similar_user_id, similarity in similar_users: user_music_list = all_users_data[similar_user_id]['music_list'] for music in user_music_list: music_id = str(music['musicId']) if music_id not in target_music_ids: # 只考虑目标用户未过的歌曲 candidate = candidate_songs[music_id] weighted_score = float(music['musicScore']) * similarity candidate['weighted_sum'] += weighted_score candidate['count'] += 1 candidate['score'] = candidate['weighted_sum'] / candidate['count'] candidate['music_info'] = music # 6. 排序并选出Top N推荐 recommendations = [] sorted_candidates = sorted(candidate_songs.items(), key=lambda x: x[1]['score'], reverse=True)[:n_recommendations] for music_id, info in sorted_candidates: recommendations.append({ 'musicId': music_id, 'name': info['music_info']['musicName'], 'author': info['music_info']['musicAuthor'], 'predicted_score': round(info['score'], 2), 'details': { 'similar_users_count': info['count'], 'average_similarity': info['weighted_sum'] / info['count'] if info['count'] > 0 else 0 } }) return recommendations帮我在优化优化基于向量的推荐方法
03-13
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值