

# 生成字典和向量语料
dictionary = corpora.Dictionary([corpora_documents])
#print(dictionary)
# print 'dfs:', dictionary.dfs # 字典词频,{单词id,在多少文档中出现}
print 'num_docs:', dictionary.num_docs # 文档数目
print 'num_pos:', dictionary.num_pos # 所有词的个数
# word_id_dict = dictionary.token2id # {词:id}
# print 'word_id_dict:'
# print len(word_id_dict)
# for k in word_id_dict.keys():
# kuozhan(corpora_documents)
# big_shuzu.append(corpora_documents)
#print big_shuzu
#dictionary.add_documents(big_shuzu) # 词典扩展
print 'num_docs:', diction