朴素贝叶斯代码(原理很简单)
直接上代码
import numpy as np
from collections import Counter,defaultdict
class Nbayes:
def __init__(self):
self.p_prior={}
self.p_condiction={}
def fit(self,x_train,y_train):
#首先求出y_train的长度
N=len(y_train)#N=y_train.shape[0]
c_y=Counter(y_train)
for key,var in c_y.items():
self.p_prior[key]=var/N#求出各个y所占的概率
#接下来求条件概率
for d in range(x.train.shape[0]):
xd_y=default(int)
vector=x_train[:,d]
for x,y in zip(vextor,y_train):
xd_y[(x,y)]+=1
for key,var in xd_y.items():
self.p_condiction[(d,key[0],key[1])]=var/c_y(key[1])
return
#写测试
def predict(self,x_test):
p=defaultdict()
for y,y1 in self.p_prior.items():
temp=y1
for d,xd in enumerate(x):#枚举类型
temp*=slef.p_condiction[(d,xd,y)] # 分母P(X)相同,故直接存储联合概率分布即可
p[y]=temp
return max(p,key=p.get)
if __name__ == '__main__':
data = np.array([[1, 0, -1], [1, 1, -1], [1, 1, 1], [1, 0, 1],
[1, 0, -1], [2, 0, -1], [2, 1, -1], [2, 1, 1],
[2, 2, 1], [2, 2, 1], [3, 2, 1], [3, 1, 1],
[3, 1, 1], [3, 2, 1], [3, 2, -1]])
X_data = data[:, :-1]
y_data = data[:, -1]
clf = NBayes()
clf.fit(X_data, y_data)
print(clf.p_prior, '\n')##每个特征的概率
print(clf.p_condition,'\n')
print(clf.predict(np.array([2, 0])))
写完了,这就是朴树贝叶斯