2017-12-25-gensim-Topic-and-Transformations

titlecategorylayouttagsdate
3.Topic and Transformations
nlp
post
Gensim
nlp
2017-12-25 20:56:24
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)


from gensim import corpora,models,similarities
import os
if os.path.exists('./model/dictionary.m'):
    dictionary=corpora.Dictionary.load('./model/dictionary.m')
    print(dictionary)
    corpus=corpora.MmCorpus('./model/corpra.mms')
    print('load dictionary and corpus done')
    #build tf-idf model
    tfidf_model=models.TfidfModel(corpus=corpus, normalize=True)
    tfidf_corpus=tfidf_model[corpus]
    # for corpra in tfidf_corpus:
    #     print(corpra)
    #build lsi model
    lsi_model=models.LsiModel(corpus=tfidf_corpus,id2word=dictionary,num_topics=4)
    lsi_corpus=lsi_model[corpus]
    # for corpra in lsi_corpus:
        # print(corpra)
    print(lsi_model.show_topics(2))
    # lsi_model.add_documents([])#add new document
    # lsi_vec = lsi_model[]#transform to vector
    #random projection model
    rp_model = models.RpModel(tfidf_corpus, num_topics=500)
    #latent Dirichlet Allocation
    lda_model = models.LdaModel(tfidf_corpus, id2word=dictionary, num_topics=100)
    #Hierarchical Dirichlet Process
    hdp_model = models.HdpModel(tfidf_corpus, id2word=dictionary)
else:
    print('file not exsit')
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值