B站账号@狼群里的小杨,记得点赞收藏加关注,一键三连哦!
朴素贝叶斯算法实例文本分类
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
def naviebayes():
"""
朴素贝叶斯进行文本分类
:return: None
"""
news = fetch_20newsgroups(subset='all')
x_train,x_test,y_train,y_test = train_test_split(news.data, news.target, test_size=0.2)
tf = TfidfVectorizer()
x_train = tf.fit_transform(x_train)
print(tf.get_feature_names())
x_test = tf.transform(x_test)
mlt = MultinomialNB(alpha=1.0)
print(x_train)
mlt.fit(x_train, y_train)
y_predict = mlt.predict(x_test)
print("预测的文章类别为: ",y_predict)
print("准确率:",mlt.score(x_test,y_test))
print("每个类别的精确率和召回率:\n",classification_report(y_test, y_predict, target_names=news.target_names))
return None
if __name__ =="__main__":
naviebayes()
决策树实例-预测泰坦尼克号乘客生存率
import pandas as pd
from sklearn.feature_extraction import DictVectorizer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
def decision():
"""
决策树对泰坦尼克号进行预测生死
:return: None
"""
titan = pd.read_csv("http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic.txt")
x = titan[['pclass', 'age', 'sex']]
y= titan['survived']
print(x)
x['age'].fillna(x['age'].mean(), inplace=True)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
dict = DictVectorizer(sparse=False)
x_train = dict.fit_transform(x_train.to_dict(orient="records"))
print(dict.get_feature_names())
x_test = dict.transform(x_test.to_dict(orient="records"))
dec = DecisionTreeClassifier(max_dept