model_list = []
for i in range(5):
model = AuthorTopicModel(corpus=corpus, num_topics=10, id2word=dictionary.id2token, \
author2doc=author2doc, chunksize=2000, passes=100, gamma_threshold=1e-10, \
eval_every=0, iterations=1, random_state=i)
top_topics = model.top_topics(corpus)
tc = sum([t[1] for t in top_topics])
model_list.append((model, tc))
通过设置参数random_state,不同的随机种子,并选择具有最高主题一致性的模型
model.show_topics(num_topics=10, num_words=10, log=False, formatted=True) 结果是
model.show_topics(num_topics=10)
>>> [(0,
'0.014*"action" + 0.014*"control" + 0.010*"policy" + 0.009*"q" + 0.009*"reinforcement" + 0.007*"optimal" + 0.006*"robot" + 0.005*"controller" + 0.005*"dynamic" + 0.005*"environment"'),
(1,
'0.020*"image" + 0.008*"face" + 0.007*"cluster" + 0.006*"signal" + 0.005*"source" + 0.005*"matrix" + 0.005*"filter" + 0.005*"search" + 0.004*"distance" + 0.004*"o_o"')]
model.get_topic_terms(topicid, topn=10) 输入主题号,返回重要词以及重要词概率,结果
model.get_topic_terms(1, topn=10)
>>> [(774, 0.019700538013351386),
(3215, 0.0075965808303036916),
(3094, 0.0067132528809042526),
(514, 0.0063925849599646822),
(2739, 0.0054527647598129206),
(341, 0.004987335769043616),
(752, 0.0046566448210636699),
(1218, 0.0046234352422933724),
(186, 0.0042132891022475458),
(829, 0.0041800479706789939)]
model.get_topics() 返回主题数字数的矩阵,10主题 7674个单词,结果是
model.get_topics()
>>> array([[ 9.57974777e-05, 6.1713078