参考官网:https://www.tensorflow.org/tutorials/keras/basic_text_classification
与源码不同,设置index_from=0从第0位开始,实际上map类型的word_index会从1开始。
import tensorflow as tf
from tensorflow import keras
import numpy as np
imdb = keras.datasets.imdb
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=10000, index_from=0)
print("train len >> %d, test len >> %d" %(len(x_train), len(x_test)))
print(x_train[0]) #每个整数表示字典中的一个特定字词
#将整数转换回字词
# A dictionary mapping words to an integer index
word_index = imdb.get_word_index()
# The first indices are reserved
#word_index = {k:(v+3) for k,v in word_index.item