python对接数据库协同过滤_User Base协同过滤的推荐系统，python实现-优快云博客

本文链接：https://blog.youkuaiyun.com/weixin_39587029/article/details/111428665

该博客展示了如何使用Python读取CSV文件并实现User Base协同过滤的推荐系统。通过计算用户之间的相似度，填充评分矩阵，并进行KNN填充。代码包括读取CSV、计算皮尔逊相似度、找到最相似用户等步骤。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

同学让帮忙写的，临时写出来的，可能有bug，贴着存档吧，不解释了

# -*- coding: utf-8 -*-

# test on python 2.7.11

import csv

import random

import math

# user base CF

def read_csv(file_name='train.csv'):

'''

read csv file to a matrix

'''

with open(file_name) as csvfile:

matrix_reader = csv.reader(csvfile)

matrix = [[int(x) for x in row] for row in matrix_reader]

return matrix

#&&&&&&&

def avg(vector):

'''

calculate average skipping 0

'''

count = sum([int(a > 0) for a in vector])

if count:

return sum(vector) / count

else:

return 0

def common_items(x, y):

'''

x, y are list, return two list

if x[i] & y[i] > 0: commx.append(x[i]) so as commy

'''

commx = [x[i] for i in range(len(x)) if x[i] > 0 and y[i] > 0]

commy = [y[i] for i in range(len(x)) if x[i] > 0 and y[i] > 0]

return commx, commy

def pearson(x, y):

'''

calculte similarity between x and y with pearson method.

'''

avgx = avg(x)

avgy = avg(y)

commx, commy = common_items(x, y)

if len(commx) == 0:

return 0

dx = [x-avgx for x in commx]

dy = [y-avgy for y in commy]

up = sum([dx[i]*dy[i] for i in range(len(commx))])

down = math.sqrt(sum([x**2 for x in dx])*sum([y**2 for y in dy]))

if down:

return up / down

else:

return 0

#&&&&&&&&&&&

def pearson_for_matrix(matrix):

'''

calculate similarity between any two user, return a paerson matrix

'''

col_length = row_length = len(matrix)

pearson_matrix = [([0] * col_length) for i in range(row_length)]

for i in range(row_length):

for j in range(i+1, col_length):

pearson_matrix[i][j] = pearson(matrix[i], matrix[j])

pearson_matrix[j][i] = pearson_matrix[i][j]

return pearson_matrix

#&&&&&&&

def k_sim_user(sim_matrix, u, k):

'''

get the k most similar users with user(u)

return their id and similarity

'''

m = len(sim_matrix)

tmp_list = [(sim_matrix[u][i], i) for i in range(m)]

tmp_list.sort(reverse=True)

tmp_list = [int(x[1]) for x in tmp_list]

users_id_list = tmp_list[0:k]

users_value = [sim_matrix[u][x] for x in users_id_list]

return users_id_list, users_value

def knn_fill_rate_matrix(matrix, sim_matrix, k=3):

'''

use knn to fill rating matrix

'''

row_length = len(matrix)

col_length = len(matrix[0])

for i in range(row_length):

simi_users, simi_users_value = k_sim_user(sim_matrix, i, k)

z = sum(simi_users_value)

user_i_avg = avg(matrix[i])

for j in range(col_length):

if matrix[i][j]==0:

weight = [

sim_matrix[i][u]*(matrix[u][j]-avg(matrix[u])) for u in simi_users]

if sum(simi_users_value) and z!=0:

matrix[i][j] = user_i_avg + sum(weight) / z

else:

matrix[i][j] = user_i_avg

if matrix[i][j]==0:

print('again')

return matrix

def saveMatrix(matrix, file_name='matrix.csv'):

'''

write matrix to csv file

if python3 with open(file_name, 'w', newline='') as csvfile:

'''

with open(file_name, 'wb') as csvfile:

matrix_writer = csv.writer(csvfile)

for line in matrix:

# print(line)

matrix_writer.writerow(line)

# test code below

rate_matrix = read_csv()

sim_matrix = pearson_for_matrix(rate_matrix)

filled = knn_fill_rate_matrix(rate_matrix, sim_matrix, k=3)

saveMatrix(filled, 'result.csv')