youtubenet简单代码实现

本文介绍了一种基于TensorFlow的YouTubeNet推荐系统模型实现。该模型利用用户历史点击行为及商品特征进行商品推荐,通过训练调整参数以提高推荐准确性。模型涵盖了商品ID、品牌、价格等多维度特征,并使用了批量归一化、Dropout等技术。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

 youtubenet_model

from config import *
import time


class Model():
    def __init__(self, item_dict):
        self.embedding_size = 128
        self.item_count = len(set(item_dict[:, 0]))
        self.item_dict = item_dict
        self.brand_count = 10
        self.price_count = 15
        self.cate2_count = 500
        self.color_count = 500
        self.corp_count = 2800
        self.city_count = 38
        self.shop_level_cnt = 10
        #self.item_feature_cnt = 5
        self.build_model()
        
    def build_model(self):
        self.hist_item = tf.placeholder(tf.int32, [None, None])  # history click[B, T]
        self.hist_brand = tf.placeholder(tf.int32, [None, None])  # history click[B, T]
        self.hist_price = tf.placeholder(tf.int32, [None, None])  # history click[B, T]
        self.hist_cate2 = tf.placeholder(tf.int32, [None, None])  # history click[B, T]
        self.hist_color = tf.placeholder(tf.int32, [None, None])  # history click[B, T]
        self.cate3d = tf.placeholder(tf.int32, [None])
        self.cate7d = tf.placeholder(tf.int32, [None])
        self.order7d = tf.placeholder(tf.int32, [None])
        self.city = tf.placeholder(tf.int32, [None])
        self.shop_level = tf.placeholder(tf.int32, [None])
        self.sample_item = tf.placeholder(tf.int32, [None, None])  # history click[B, T]
        self.sample_brand = tf.placeholder(tf.int32, [None, None])  # history click[B, T]
        self.sample_price = tf.placeholder(tf.int32, [None, None])  # history click[B, T]
        self.sample_cate2 = tf.placeholder(tf.int32, [None, None])  # history click[B, T]
        self.sample_color = tf.placeholder(tf.int32, [None, None])  # history click[B, T]
        self.sl = tf.placeholder(tf.int32, [None])
        self.keep_prob = tf.placeholder(tf.float32, [])
        self.lr = tf.placeholder(tf.float64, [])

        item_emb_w = tf.get_variable("item_emb_w", [self.item_count, self.embedding_size//2], initializer=tf.initializers.truncated_normal())
        brand_emb_w = tf.get_variable("brand_emb_w", [self.brand_count, self.embedding_size//8], initializer=tf.initializers.truncated_normal())
        price_emb_w = tf.get_variable("price_emb_w", [self.price_count, self.embedding_size//8], initializer=tf.initializers.truncated_normal())
        cate2_emb_w = tf.get_variable("cate2_emb_w", [self.cate2_count, self.embedding_size//8], initializer=tf.initializers.truncated_normal())
        color_emb_w = tf.get_variable("color_emb_w", [self.color_count, self.embedding_size//8], initializer=tf.initializers.truncated_normal())
        city_emb_w = tf.get_variable("city_emb_w", [self.city_count, self.embedding_size//8], initializer=tf.initializers.truncated_normal())
        shop_level_emb_w = tf.get_variable("shop_level_emb_w", [self.shop_level_cnt, 80], initializer=tf.initializers.truncated_normal())

        input_b = tf.get_variable("input_b", [self.item_count], initializer=tf.constant_initializer(0.0))

        h_emb = tf.concat([tf.nn.embedding_lookup(item_emb_w, self.hist_item),
                           tf.nn.embedding_lookup(brand_emb_w, self.hist_brand),
                           tf.nn.embedding_lookup(price_emb_w, self.hist_price),
                           tf.nn.embedding_lookup(cate2_emb_w, self.hist_cate2),
                           tf.nn.embedding_lookup(color_emb_w, self.hist_color)], axis=2)

        mask = tf.sequence_mask(self.sl, tf.shape(h_emb)[1], dtype=tf.float32)  # [B,T]
        mask = tf.expand_dims(mask, -1)  # [B,T,1]
        mask = tf.tile(mask, [1, 1, tf.shape(h_emb)[2]])  # [B, T, 5*e]
        h_emb *= mask  # [B, T, 5*e]
        hist = tf.reduce_sum(h_emb, 1)  # [B, 5*e]
        hist = tf.div(hist,
                      #tf.cast(tf.tile(tf.expand_dims(self.sl, 1), [1, self.item_feature_cnt * self.embedding_size]),
                      tf.cast(tf.tile(tf.expand_dims(self.sl, 1), [1, self.embedding_size]),
                              tf.float32))  # [B,e]

        #hist = tf.reduce_mean(h_emb, 1)
        user_info = tf.concat([tf.nn.embedding_lookup(shop_level_emb_w, self.shop_level),
                               tf.nn.embedding_lookup(city_emb_w, self.city),
                               tf.nn.embedding_lookup(cate2_emb_w, self.cate3d),
                               tf.nn.embedding_lookup(cate2_emb_w, self.cate7d),
                               tf.nn.embedding_lookup(cate2_emb_w, self.order7d)], axis=-1)

        #self.input = tf.concat([hist,
        #                       tf.nn.embedding_lookup(city_emb_w, self.city)
        #                       ], axis=-1)
        self.input = tf.concat([user_info, hist], axis=-1)
        bn = tf.layers.batch_normalization(inputs=self.input, name='b1')
        layer_1 = tf.layers.dense(bn, 512, activation=tf.nn.leaky_relu, name='f1')
        layer_1 = tf.nn.dropout(layer_1, keep_prob=self.keep_prob)
        layer_2 = tf.layers.dense(layer_1, 256, activation=tf.nn.leaky_relu, name='f2')
        layer_2 = tf.nn.dropout(layer_2, keep_prob=self.keep_prob)
        #layer_3 = tf.layers.dense(layer_2, self.item_feature_cnt * self.embedding_size, activation=tf.nn.relu,
        #                          name='f3')
        self.user_emb = tf.layers.dense(layer_2, self.embedding_size, activation=tf.nn.leaky_relu, name='f3')

        sample_w = tf.concat([tf.nn.embedding_lookup(item_emb_w, self.sample_item),
                            tf.nn.embedding_lookup(brand_emb_w, self.sample_brand),
                            tf.nn.embedding_lookup(price_emb_w, self.sample_price),
                            tf.nn.embedding_lookup(cate2_emb_w, self.sample_cate2),
                            tf.nn.embedding_lookup(color_emb_w, self.sample_color),
                              # tf.tile(tf.expand_dims(self.basic, 1), [1, tf.shape(next_b)[1], 1])
                            ], axis=-1)  # [B,5*e]

        self.item_emb = sample_w
        #bn = tf.layers.batch_normalization(sample_w, name='b2')
        #item_layer_1 = tf.layers.dense(bn, 512, activation=tf.nn.leaky_relu, name='f4')
        #item_layer_1 = tf.nn.dropout(item_layer_1, keep_prob=self.keep_prob)
        #item_layer_2 = tf.layers.dense(item_layer_1, 256, activation=tf.nn.leaky_relu, name='f5')
        #item_layer_2 = tf.nn.dropout(item_layer_2, keep_prob=self.keep_prob)
        #self.item_emb = tf.layers.dense(item_layer_2, self.embedding_size, activation=tf.nn.leaky_relu, name='f6')

        sample_b = tf.nn.embedding_lookup(input_b, self.sample_item)  # [B,5e]

        user_v = tf.expand_dims(self.user_emb, axis=1)
        user_x_item = tf.squeeze(tf.matmul(user_v, self.item_emb, transpose_b=True), axis=1)
        user_norm = tf.sqrt(tf.reduce_sum(tf.square(user_v), axis=-1))
        sample_norm = tf.sqrt(tf.reduce_sum(tf.square(sample_w), axis=-1))
        sim = user_x_item / user_norm / sample_norm * 20
       
        prob = tf.nn.softmax(sim)
        self.sim = prob

        #self.logits = tf.squeeze(tf.matmul(user_v, next_w), axis=1) + next_b
        prob = tf.slice(prob, [0, 0], [-1, 1])

        global_step = tf.Variable(0, trainable=False, name='global_step')
        initial_learning_rate = self.lr
        self.learning_rate = tf.train.exponential_decay(initial_learning_rate,
                                                        global_step=global_step,
                                                        decay_steps=20000,
                                                        decay_rate=0.9)
        
        
        self.add_global = global_step.assign_add(1)
        #self.global_epoch_step = tf.Variable(0, trainable=False, name='global_epoch_step')
        #self.global_epoch_step_op = tf.assign(self.global_epoch_step, self.global_epoch_step + 1)

        ##self.yhat = tf.nn.softmax(self.logits)
        ##self.loss = tf.reduce_mean(-self.y * tf.log(self.yhat + 1e-24))
        #self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, labels=self.y),name='loss')
        self.loss = -tf.reduce_sum(tf.log(prob+1e-8))

        #trainable_params = tf.trainable_variables()
        #self.opt = tf.train.GradientDescentOptimizer(learning_rate=self.lr)
        #gradients = tf.gradients(self.loss, trainable_params)
        #clip_gradients, _ = tf.clip_by_global_norm(gradients, 5)
        #self.train_op = self.opt.apply_gradients(
        #    zip(clip_gradients, trainable_params), global_step=self.global_step)


        #optimizer = tf.train.AdamOptimizer(self.lr, name='adam')
        optimizer = tf.train.AdamOptimizer(self.learning_rate, name='adam')
        #optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.train_op = optimizer.minimize(self.loss)
        self.item_emb = tf.concat([item_emb_w,
                             tf.nn.embedding_lookup(brand_emb_w, tf.convert_to_tensor(self.item_dict[:, 1], dtype=tf.int32)),
                             tf.nn.embedding_lookup(price_emb_w, tf.convert_to_tensor(self.item_dict[:, 2], dtype=tf.int32)),
                             tf.nn.embedding_lookup(cate2_emb_w, tf.convert_to_tensor(self.item_dict[:, 3], dtype=tf.int32)),
                             tf.nn.embedding_lookup(color_emb_w, tf.convert_to_tensor(self.item_dict[:, 5], dtype=tf.int32)),
                             ], axis=-1)
        #self.user_x_item = tf.matmul(layer_3, item_emb, transpose_b=True) + input_b
        #self.output = tf.nn.softmax(self.user_item)

    def train(self, sess, uij):
        loss, _, _, lr = sess.run([self.loss, self.train_op, self.add_global, self.learning_rate], feed_dict={
            self.hist_item: uij[1],
            self.hist_brand: uij[2],
            self.hist_price: uij[3],
            self.hist_cate2: uij[4],
            self.hist_color: uij[5],
            self.city: uij[6],
            self.cate3d: uij[7],
            self.cate7d: uij[8],
            self.order7d: uij[9],
            self.shop_level: uij[10],
            self.sample_item: uij[14],
            self.sample_brand: uij[15],
            self.sample_price: uij[16],
            self.sample_cate2: uij[17],
            self.sample_color: uij[18],
            self.sl: uij[19],
            self.keep_prob: 0.9,
            self.lr: 0.3
        })
        #for i in tmp:
        #    print(i)
        return loss

    def test(self, sess, uij):
        user_emb = sess.run(self.user_emb, feed_dict={
            self.hist_item: uij[1],
            self.hist_brand: uij[2],
            self.hist_price: uij[3],
            self.hist_cate2: uij[4],
            self.hist_color: uij[5],
            self.city: uij[6],
            self.cate3d: uij[7],
            self.cate7d: uij[8],
            self.order7d: uij[9],
            self.shop_level: uij[10],
            self.sl: uij[12],
            self.keep_prob: 1.0
        })
        return user_emb

    def save(self, sess, path):
      saver = tf.train.Saver()
      saver.save(sess, save_path=path)

    def restore(self, sess, path):
      saver = tf.train.Saver()
      saver.restore(sess, save_path=path)

youtubenet_iter_train

import numpy as np
from config import *

item_dict = []
with open(os.path.join(DATA_PATH, "item_dict.new"), encoding='utf-8') as f:
    for line in f.readlines():
        item_dict.append(line.strip().split('\t'))

item_dict = np.array(item_dict)
id_li = sorted(list(set(item_dict[:, 0])))
brand_li = [x if x!= '' else '无' for x in sorted(list(set(item_dict[:, 1])))]
price_li = [x if x!= '' else '无' for x in sorted(list(set(item_dict[:, 2])))]
cate2_li = [x if x!= '' else '无' for x in sorted(list(set(item_dict[:, 3])))]
cate3_li = [x if x!= '' else '无' for x in sorted(list(set(item_dict[:, 4])))]
color_li = [x if x!= '' else '无' for x in sorted(list(set(item_dict[:, 5])))]

id_dict = {v: k + 1 for k, v in enumerate(id_li)}
reverse_id_dict = {v: k for k, v in id_dict.items()}
brand_dict = {v: k + 1 for k, v in enumerate(brand_li)}
price_dict = {v: k + 1 for k, v in enumerate(price_li)}
cate2_dict = {v: k + 1 for k, v in enumerate(cate2_li)}
cate3_dict = {v: k + 1 for k, v in enumerate(cate3_li)}
color_dict = {v: k + 1 for k, v in enumerate(color_li)}
mapping = {0: id_dict, 1: brand_dict, 2: price_dict, 3: cate2_dict, 4: cate3_dict, 5: color_dict}

all_item = {}
for d in item_dict:
    tmp = []
    for ind in range(len(d)):
        if d[ind] == '':
            d[ind] = '无'
        tmp.append(mapping[ind][d[ind]])
    all_item[d[0]] = tmp
all_item[''] = [0] * 6

for i, d in enumerate(item_dict):
    d[0] = id_dict[d[0]]
    d[1] = brand_dict[d[1]]
    d[2] = price_dict[d[2]]
    d[3] = cate2_dict[d[3]]
    d[4] = cate3_dict[d[4]]
    d[5] = color_dict[d[5]]
    item_dict[i] = d

class TrainIterator:
    def __init__(self, source, max_batch_size=1000):
        self.source = open(source, 'r')
        self.source_dicts = []
        self.batch_size = batch_size
        self.source_buffer = []
        self.k = batch_size * max_batch_size
        self.end_of_data = False

    def __iter__(self):
        return self

    def reset(self):
        self.source.seek(0)

    def __next__(self):
        if self.end_of_data:
            self.end_of_data = False
            self.reset()
            raise StopIteration

        user_id, hist_items, hist_brand, hist_price, hist_cate2, hist_color, city, cate3d, cate7d, order7d, shop_level, \
        reg_till_cur, reg_till_order, cur_end_time_diff, sample_items, sample_brand, sample_price, sample_cate2, sample_color, sl = \
            [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []
            

        if len(self.source_buffer) == 0:
            for k_ in range(self.k):
                ss = self.source.readline()
                if ss == "":
                    break
                self.source_buffer.append(ss.strip().split("\t"))

        if len(self.source_buffer) == 0:
            self.end_of_data = False
            self.reset()
            raise StopIteration

        try:
            while True:
                if len(self.source_buffer) >= self.batch_size:
                    end = self.batch_size
                else:
                    end = len(self.source_buffer)

                buf = np.array(self.source_buffer[:end])
                user_id = np.array(buf[:, 0], np.str)
                #continuous_feat = np.array(buf[:, 5:9], np.float)
                city = [prov_dict[x] for x in buf[:, 1]]
                shop_level = [int(x) for x in buf[:, 2]]
                cate3d = [int(cate2_dict[x]) for x in buf[:, 3]]
                cate7d = [int(cate2_dict[x]) for x in buf[:, 4]]
                order7d = [int(cate2_dict[x]) for x in buf[:, 5]]

                hist_ck_items = [x.split(",") for x in buf[:, 9]]
                hist_items = [[all_item[y][0] for y in x] for x in hist_ck_items]
                hist_items = [x+[0]*(SEQ_LEN-len(x)) if len(x) <= SEQ_LEN else x[:SEQ_LEN] for x in hist_items]
                hist_brand = [[all_item[y][1] for y in x] for x in hist_ck_items]
                hist_brand = [x+[0]*(SEQ_LEN-len(x)) if len(x) <= SEQ_LEN else x[:SEQ_LEN] for x in hist_brand]
                hist_price = [[all_item[y][2] for y in x] for x in hist_ck_items]
                hist_price = [x+[0]*(SEQ_LEN-len(x)) if len(x) <= SEQ_LEN else x[:SEQ_LEN] for x in hist_price]
                hist_cate2 = [[all_item[y][3] for y in x] for x in hist_ck_items]
                hist_cate2 = [x+[0]*(SEQ_LEN-len(x)) if len(x) <= SEQ_LEN else x[:SEQ_LEN] for x in hist_cate2]
                hist_color = [[all_item[y][5] for y in x] for x in hist_ck_items]
                hist_color = [x+[0]*(SEQ_LEN-len(x)) if len(x) <= SEQ_LEN else x[:SEQ_LEN] for x in hist_color]

                next_item = buf[:, 10]
                next_brand = [all_item[x][1] for x in next_item]
                next_price = [all_item[x][2] for x in next_item]
                next_cate2 = [all_item[x][3] for x in next_item]
                next_color = [all_item[x][5] for x in next_item]
                next_item = [all_item[x][0] for x in next_item]

                neg_items = [x.split(",")[:NEG_LEN] for x in buf[:, 11]]
                neg_brand = [[all_item[y][1] for y in x] for x in neg_items]
                neg_price = [[all_item[y][2] for y in x] for x in neg_items]
                neg_cate2 = [[all_item[y][3] for y in x] for x in neg_items]
                neg_color = [[all_item[y][5] for y in x] for x in neg_items]
                neg_items = [[all_item[y][0] for y in x] for x in neg_items]

                sample_items = [[next_item[ind]] + neg_items[ind] for ind in range(len(neg_items))]
                sample_brand = [[next_brand[ind]] + neg_brand[ind] for ind in range(len(neg_brand))]
                sample_price = [[next_price[ind]] + neg_price[ind] for ind in range(len(neg_price))]
                sample_cate2 = [[next_cate2[ind]] + neg_cate2[ind] for ind in range(len(neg_cate2))]
                sample_color = [[next_color[ind]] + neg_color[ind] for ind in range(len(neg_color))]
                sl = [len(x) for x in hist_ck_items]
                
                del self.source_buffer[:end]
                break

        except IOError:
            self.end_of_data = True

        if len(user_id) == 0:
            user_id, hist_items, hist_brand, hist_price, hist_cate2, hist_color, city, cate3d, cate7d, order7d, shop_level, \
            reg_till_cur, reg_till_order, cur_end_time_diff, sample_items, sample_brand, sample_price, sample_cate2, sample_color, sl = self.next()

        return [user_id, hist_items, hist_brand, hist_price, hist_cate2, hist_color, city, cate3d, cate7d, order7d, shop_level, \
               reg_till_cur, reg_till_order, cur_end_time_diff, sample_items, sample_brand, sample_price, sample_cate2, sample_color, sl]


daily

from config import *
from youtubenet_model import Model
from youtubenet_iter_train import *
import time
import sys


def train():
    train_data = TrainIterator(TRAIN_PATH)

    with tf.Session(config=gpu_config) as sess:
        model = Model(item_dict)

        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        print("Model built. {}".format(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))))
        sys.stdout.flush()

        avg_loss = 0
        step = 1
        for _ in range(epoch):
            for uij in train_data:
                loss_val = model.train(sess, uij)

                avg_loss += loss_val
                if step % every_k == 0:
                    print("step: {0}. avg loss: {1}. {2}".format(step, avg_loss/every_k, time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))))
                    sys.stdout.flush()
                    avg_loss = 0

                step += 1

        model.save(sess, os.path.join(MODEL_PATH, "youtubenet.ckpt"))


if __name__ == '__main__':
    train()

youtube_iter_test

import numpy as np
from config import *

item_dict = []
with open(os.path.join(DATA_PATH, "item_dict.old"), encoding='utf-8') as f:
    for line in f.readlines():
        item_dict.append(line.strip().split('\t'))

item_dict = np.array(item_dict)
id_li = sorted(list(set(item_dict[:, 0])))
brand_li = [x if x!= '' else '无' for x in sorted(list(set(item_dict[:, 1])))]
price_li = [x if x!= '' else '无' for x in sorted(list(set(item_dict[:, 2])))]
cate2_li = [x if x!= '' else '无' for x in sorted(list(set(item_dict[:, 3])))]
cate3_li = [x if x!= '' else '无' for x in sorted(list(set(item_dict[:, 4])))]
color_li = [x if x!= '' else '无' for x in sorted(list(set(item_dict[:, 5])))]

id_dict = {v: k + 1 for k, v in enumerate(id_li)}
reverse_id_dict = {v: k for k, v in id_dict.items()}
brand_dict = {v: k + 1 for k, v in enumerate(brand_li)}
price_dict = {v: k + 1 for k, v in enumerate(price_li)}
cate2_dict = {v: k + 1 for k, v in enumerate(cate2_li)}
cate3_dict = {v: k + 1 for k, v in enumerate(cate3_li)}
color_dict = {v: k + 1 for k, v in enumerate(color_li)}
mapping = {0: id_dict, 1: brand_dict, 2: price_dict, 3: cate2_dict, 4: cate3_dict, 5: color_dict}

all_item = {}
for d in item_dict:
    tmp = []
    for ind in range(len(d)):
        if d[ind] == '':
            d[ind] = '无'
        tmp.append(mapping[ind][d[ind]])
    all_item[d[0]] = tmp
all_item[''] = [0] * 6

for i, d in enumerate(item_dict):
    d[0] = id_dict[d[0]]
    d[1] = brand_dict[d[1]]
    d[2] = price_dict[d[2]]
    d[3] = cate2_dict[d[3]]
    d[4] = cate3_dict[d[4]]
    d[5] = color_dict[d[5]]
    item_dict[i] = d


class TestIterator:
    def __init__(self, source, max_batch_size=1000):
        self.source = open(source, 'r')
        self.source_dicts = []
        self.batch_size = batch_size
        self.source_buffer = []
        self.k = batch_size * max_batch_size
        self.end_of_data = False

    def __iter__(self):
        return self

    def reset(self):
        self.source.seek(0)

    def __next__(self):
        if self.end_of_data:
            self.end_of_data = False
            self.reset()
            raise StopIteration

        user_id, hist_items, hist_brand, hist_price, hist_cate2, hist_color, city, cate3d, cate7d, order7d, shop_level, continuous_feat, sl = \
            [], [], [], [], [], [], [], [], [], [], [], [], []

        if len(self.source_buffer) == 0:
            for k_ in range(self.k):
                ss = self.source.readline()
                if ss == "":
                    break
                self.source_buffer.append(ss.strip().split("\t"))

        if len(self.source_buffer) == 0:
            self.end_of_data = False
            self.reset()
            raise StopIteration

        try:
            while True:
                if len(self.source_buffer) >= self.batch_size:
                    end = self.batch_size
                else:
                    end = len(self.source_buffer)

                buf = np.array(self.source_buffer[:end])
                user_id = np.array(buf[:, 0], np.str)
                city = [prov_dict[x] for x in buf[:, 1]]
                shop_level = [int(x) if x != '' else 1 for x in buf[:, 2]]
                cate3d = [cate2_dict.get(x, 0) for x in buf[:, 3]]
                cate7d = [cate2_dict.get(x, 0) for x in buf[:, 4]]
                order7d = [cate2_dict.get(x, 0) for x in buf[:, 5]]
                hist_ck_items = [x.split(",") for x in buf[:, 9]]
                hist_items = [[all_item[y][0] for y in x if y in all_item] for x in hist_ck_items]
                hist_items = [x+[0]*(SEQ_LEN-len(x)) if len(x) <= SEQ_LEN else x[:SEQ_LEN] for x in hist_items]
                hist_brand = [[all_item[y][1] for y in x if y in all_item] for x in hist_ck_items]
                hist_brand = [x+[0]*(SEQ_LEN-len(x)) if len(x) <= SEQ_LEN else x[:SEQ_LEN] for x in hist_brand]
                hist_price = [[all_item[y][2] for y in x if y in all_item] for x in hist_ck_items]
                hist_price = [x+[0]*(SEQ_LEN-len(x)) if len(x) <= SEQ_LEN else x[:SEQ_LEN] for x in hist_price]
                hist_cate2 = [[all_item[y][3] for y in x if y in all_item] for x in hist_ck_items]
                hist_cate2 = [x+[0]*(SEQ_LEN-len(x)) if len(x) <= SEQ_LEN else x[:SEQ_LEN] for x in hist_cate2]
                hist_color = [[all_item[y][5] for y in x if y in all_item] for x in hist_ck_items]
                hist_color = [x+[0]*(SEQ_LEN-len(x)) if len(x) <= SEQ_LEN else x[:SEQ_LEN] for x in hist_color]
                sl = [len(x) for x in hist_ck_items]

                del self.source_buffer[:end]
                break

        except IOError:
            self.end_of_data = True

        if len(user_id) == 0:
            user_id, hist_items, hist_brand, hist_price, hist_cate2, hist_color, city, cate3d, cate7d, order7d, shop_level, continuous_feat, sl = self.next()

        return [user_id, hist_items, hist_brand, hist_price, hist_cate2, hist_color, city, cate3d, cate7d, order7d, shop_level, continuous_feat, sl]

hourly

import time
from config import *
from youtubenet_model import Model
from  youtubenet_iter_test import *


def dump_emb(emb, filename):
    with open(filename, "w") as f:
        for k, v in emb.items():
            f.write("{0} {1}\n".format(k, ",".join(v)))

def update():
    model = Model(item_dict)

    with tf.Session(config=gpu_config) as sess:
        model.restore(sess, os.path.join(MODEL_PATH, "youtubenet.ckpt"))

        cnt = 0
        user_res = dict()
        test_data = TestIterator(TEST_PATH)
        for uij in test_data:
            output = model.test(sess, uij)
            for ind, u in enumerate(uij[0]):
                user_res[u] = list(map(lambda x: str(x), output[ind]))

                cnt += 1
                if cnt % 100000 == 0:
                    print("{0} user embedding done. {1}".format(cnt, time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))))
                    sys.stdout.flush()

        dump_emb(user_res, os.path.join(DATA_PATH, "user_res"))

        online_item = set([line.strip() for line in open(os.path.join(DATA_PATH, "online_item"))])
        item_emb = sess.run(model.item_emb)
        item_all = dict()
        for ind, vec in enumerate(item_emb):
            item_id = reverse_id_dict[ind+1]
            vec = list(map(lambda x: str(x), vec))
            item_all[item_id] = vec

        common_item = set(item_all.keys()).intersection(online_item)
        item_res = {x: item_all[x] for x in common_item}
        dump_emb(item_res, os.path.join(DATA_PATH, "item_res"))


if __name__ == '__main__':
    update()

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值