因前一篇https://blog.youkuaiyun.com/fjssharpsword/article/details/97000479采样问题未解决,发现如下github上有BPMF代码,采用wishart先验,性能和pymc3一致。
参考:https://github.com/LoryPack/BPMF
# coding:utf-8
'''
@author: Jason.F
@data: 2019.08.01
@function: baseline BPMF(Bayesian Probabilistic Matrix Factorization)
Datatset: MovieLens-1m:https://grouplens.org/datasets/movielens/
Evaluation: RMSE
'''
import numpy as np
import random
import pandas as pd
from numpy.random import multivariate_normal
from scipy.stats import wishart
class DataSet:
def __init__(self):
self.trainset, self.testset, self.maxu, self.maxi, self.maxr = self._getDataset_as_list()
def _getDataset_as_list(self):
#trainset
filePath = "/data/fjsdata/BMF/ml-1m.train.rating"
data = pd.read_csv(filePath, sep='\t', header=None, names=['user', 'item', 'rating'], \
usecols=[0, 1, 2], dtype={0: np.int32, 1: np.int32, 2: np.float})
maxu, maxi, maxr = data['user'].max()+1, data['item'].max()+1, data['rating'].max()
print('Dataset Statistics: Interaction = %d, User = %d, Item = %d, Sparsity = %.4f' % \
(data.shape[0], maxu, maxi, data.shape[0]/(maxu*maxi)))
trainset = data.values.tolist()
#testset
filePath = "/data/fjsdata/BMF/ml-1m.test.rating"
data = pd.read_csv(filePath, sep='\t', header=None, names=['user', 'item', 'rating'], \
usecols=[0, 1, 2], dtype={0: np.int32, 1: np.int32, 2: np.float})
testset = data.values.tolist()
return trainset, testset, maxu, maxi, maxr
def list_to_matrix(self, dataset, maxu, maxi):
dataMat = np.zeros([maxu, maxi], dtype=np.float32)
for u,i,r in dataset:
dataMat[int(u)][int(i)] = float(r)
return np.array(dataMat)
def Normal_Wishart(mu_0, lamb, W, nu, seed=None):
"""Function extracting a Normal_Wishart random variable"""
# first draw a Wishart distribution:
Lambda = wishart(df=nu, scale=W, seed=seed).rvs() # NB: Lambda is a matrix.
# then draw a Gaussian multivariate RV with mean mu_0 and(lambda*Lambda)^{-1} as covariance matrix.
cov = np.linalg.inv(lamb * Lambda) # this i