import numpy as np import matplotlib.pyplot as plt from scipy import io from scipy import stats import os from sklearn.metrics import precision_recall_curve from sklearn.naive_bayes import GaussianNB def prior(yTrain): ''' :param yTrain: 训练数据集样本的类别标签 <class 'numpy.ndarray'> (218, 1) :return: num_classes*1 numpy.ndarray 每个类别的先验概率 ''' num_total=yTrain.shape[0] # num_class_1=np.sum(yTrain==1) # num_class_2 = np.sum(yTrain == 2) # num_class_3 = np.sum(yTrain == 3) # num_class_4 = np.sum(yTrain == 4) # num_class_5 = np.sum(yTrain == 5) p=np.zeros((5,1)) for i in range(p.shape[0]): p[i,0]=np.sum(yTrain==i+1)/num_total return p def likelihood(xTrain, yTrain): ''' :param xTrain: 训练数据集的样本特征矩阵 :param yTrain: 训练数据集样本的标签矩阵 :return: 条件概率:条件均值和条件方差 M numpy.ndarray shape=[num_feat,num_classes] 其中的第c列,第m行表示,在当前类别为c的条件下,特征m的均值 V numpy.ndarray shape=[num_feat,num_classes] 其中的第c列,第m行表示,在当前类别为c的条件下,特征m的方差 ''' num_feat=xTrain.shape[1] num_classes=5 M=np.zeros((num_feat,num_classes)) V=np.ones((num_feat,num_classes)) for c in range(num_classes): # num_class_examples=np.sum(yTrain==c+1) # num_class_examples : scalar 表示训练数据集中属于当前类别的样本总数 temp_class_examples=xTrain[np.where(np.squeeze(yTrain,axis=1)==c+1)] # print('temp_class_examples',temp_class_examples.shape) # temp_class_examples numpy.ndarray shape=[num_class_examples,num_feat] # 表示训练数据集中属于当前类别的样本的所有特征 M[:,c]=np.mean(temp_class_examples,axis=0) # temp_mean numpy.ndarray shape=[num_feat,] 表示在当前类别下,每个维度特征的平均值
python实现 Gaussian naive bayes高斯朴素贝叶斯
最新推荐文章于 2025-05-28 06:19:07 发布