# 导入所用到的库
import pandas as pd
from IPython.display import display
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
from math import sqrt
#数据预处理
df0=pd.read_csv('C:\\Desktop\\grad_design\\ml-100k.csv',names=['user','movie','rating','time']) #int类型
df1=pd.DataFrame(df0,columns=['user','movie','rating'])
r1=df1.groupby(df0['movie']).size()
r2=df1.groupby(df0['user']).size()
mid=r1.index[r1>=80]
uid=r2.index[r2>=80]
df2=df1[df1['movie'].isin(mid)]
df3=df2[df2['user'].isin(uid)]
#划分数据集
train_data,test_data
GBDT推荐算法源代码
最新推荐文章于 2022-05-15 18:37:30 发布