通过tfidf+LR做文本分类、
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 5 13:23:31 2018
@author: Lenovo
"""
import jieba as jb
import numpy as np
import pandas as pd
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from sklearn import feature_extraction
from sklearn.feature_extraction.text import TfidfTransformer
from gensim import corpora,models
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from gensim.similarities.docsim import Similarity
from sklearn.naive_bayes import MultinomialNB
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
##content_id,conte