基于用户协同过滤的推荐系统算法,python 实现

# coding=utf-8
__author__='changgy'
__date__  ='20160205'


import sys
import math
import numpy as np
import pandas as pd
import datetime


''' 
u.data 表示100k条评分记录,每一列的数值含义是:
user_id | item_id | rating | timestamp


u.user表示用户的信息,每一列的数值含义是:
user_id | age | gender | occupation | zip code


u.item文件表示电影的相关信息,每一列的数值含义是
item_id| movie title | release date | video release date |IMDb URL | unknown | Action | Adventure | Animation 
| Children's | Comedy | Crime | Documentary | Drama | Fantasy |Film-Noir | Horror | Musical | Mystery | Romance
| Sci-Fi |Thriller | War | Western |
'''


 
global g_DF_UDATA
global g_DF_USER
global g_DF_MOVIE_ITEM




def readSrcData(fileDir,sep, columns):
loop = True
chunkSize = 100000
chunks = []
#reader = pd.read_csv('u.data', sep='\t', iterator=True)

reader = pd.read_table(fileDir,sep, header=None,iterator=True)
i = 0

while loop:
try:
chunk = reader.get_chunk(chunkSize)
chunks.append(chunk)
except StopIteration:
loop = False
#print 'iteration is stoppd'

df = pd.concat(chunks,ignore_index=True)
df.columns=columns
return df


class CcalcCostTime():
#d1,d2


def __init__(self):
self.d1 = datetime.datetime.now()

def __del__(self):
d2 = datetime.datetime.now()
print 'cost is ',(d2-self.d1).seconds ,'s ',(d2-self.d1).microseconds / 1000 ,'ms'


def getUserDataFramebyGender(user_all, gender):
# dataframe to series
series_user = user_all[ user_all[gender] > 0 ][gender]

# series to dataFrame
return pd.DataFrame(series_user)


def readData():
global g_DF_UDATA
global g_DF_USER
global g_DF_MOVIE_ITEM

  # 读取udata文件
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值