训练集数据分析
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.rc('font', family='SimHei', size=13)
import os
import gc
import re
import warnings
import sys
warnings.filterwarnings("ignore")
path = './data/'
trn_click = pd.read_csv(path+'train_click_log.csv')
item_df = pd.read_csv(path+'articles.csv')
item_df = item_df.rename(columns={
'article_id': 'click_article_id'})
item_emb_df = pd.read_csv(path+'articles_emb.csv')
tst_click = pd.read_csv(path+'testA_click_log.csv')
trn_click['rank'] = trn_click.groupby(['user_id'])['click_timestamp'].rank(ascending=False).astype(int)
tst_click['rank'] = tst_click.groupby(['user_id'])['click_timestamp'].rank(ascending=False).astype(int)
trn_click['click_cnts'] = trn_click.groupby(['user_id'])[