推荐经典算法实现之NCF(pytorch+MovieLen)

最新推荐文章于 2023-12-30 23:01:37 发布

fjssharpsword

最新推荐文章于 2023-12-30 23:01:37 发布

阅读量5.1k

点赞数 1

CC 4.0 BY-SA版权

分类专栏： Algorithm

本文链接：https://blog.youkuaiyun.com/fjssharpsword/article/details/95583601

# coding:utf-8  
'''
@author: Jason.F
@data: 2019.07.11
@function: Implementing NCF with Torch  
           Dataset: Movielen Dataset(ml-1m) 
           Evaluating: hitradio,ndcg
           https://arxiv.org/pdf/1708.05031.pdf
           https://github.com/hexiangnan/neural_collaborative_filtering
'''
import pandas as pd
import numpy as np
import math
from collections import defaultdict
import heapq
import scipy.sparse as sp
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
import torch.backends.cudnn as cudnn
import os

class NCFData(torch.utils.data.Dataset):#define the dataset
    def __init__(self, features, num_item, train_mat=None, num_ng=0, is_training=None):
        super(NCFData, self).__init__()
        # Note that the labels are only useful when training, we thus add them in the ng_sample() function.
        self.features_ps = features
        self.num_item = num_item
        self.train_mat = train_mat
        self.num_ng = num_ng
        self.is_training = is_training
        self.labels = [0 for _ in range(len(features))]

    def ng_sample(self):
        assert self.is_training, 'no need to sampling when testing'
        self.features_ng = []
        for x in self.features_ps:
            u = x[0]
            for t in range(self.num_ng):
                j = np.random.randint(self.num_item)
                while (u, j) in self.train_mat:
                    j = np.random.randint(self.num_item)
                self.features_ng.append([u, j])
        
        labels_ps = [1 for _ in range(len(self.features_ps))]
        labels_ng = [0 for _ in range(len(self.features_ng))]
        
        self.features_fill = self.features_ps + self.features_ng
        self.labels_fill = labels_ps + labels_ng
        
    def __len__(self):
        return (self.num_ng + 1) * len(self.labels)
    
    def __getitem__(self, idx):
        '''
        if self.is_training:
            self.ng_sample()
            features = self.features_fill
            labels = self.labels_fill
        else:
            features = self.features_ps
            labels = self.labels
        '''
        features = self.features_fill if self.is