import numpy as np import pandas as pd # 导入预处理 from sklearn.preprocessing import StandardScaler, MinMaxScaler, PolynomialFeatures # 导入高斯朴素贝叶斯,多项式朴素贝叶斯 from sklearn.naive_bayes import GaussianNB, MultinomialNB # 导入学习器流水线 from sklearn.pipeline import Pipeline from sklearn.metrics import accuracy_score,recall_score,f1_score,precision_score from sklearn.model_selection import train_test_split data = pd.read_csv('iris.data', header=None) x, y = data[np.arange(4)], data[4] # 标签数字化 y = pd.Categorical(values=y).codes # 生成两个类别数据 threhold = len([i for i in y if i<=1]) x = x[:threhold] y = y[:threhold] # 选择某个特征 features = [1] x = x[features] # 随机数为0 x, x_test, y, y_test = train_test_split(x, y, train_size=0.7, random_state=0) priors = np.array((2, 2), dtype=float) priors /= priors.sum() # 学习器流水线----特征处理-- gnb = Pipeline([ ('sc', StandardScaler()), ('poly', PolynomialFeatures(
贝叶斯文本计算
最新推荐文章于 2020-12-20 14:44:36 发布