data = [
{" attend ": "early", "rest": "late", "score": "high"},
{" attend ": "early", "rest": "early","score": "high"},
{" attend ": "early", "rest": "late", "score": "low"},
{" attend ": "late", "rest": "early", "score": "low"},
{" attend ": "late", "rest": "late", "score": "high"},
{" attend ": "early", "rest": "late", "score": "high"},
{" attend ": "early", "rest": "early", "score": "high"},
{" attend ": "late", "rest": "early","score": "low"},
{" attend ": "late", "rest": "late","score": "low"},
{" attend ": "early", "rest": "late","score": "high"},
]
#计算先验概率
def P(data, cls_val, cls_name="score"):
cnt = 0.0
for e in data:
if e[cls_name] == cls_val: # cls_val=high/low
cnt += 1
return cnt / len(data)
#计算条件概率
def PT(data, cls_val, attr_name, attr_val, cls_name="score"):
cnt1 = 0.0
cnt2 = 0.0
for e in data:
if e[cls_name] == cls_val: #cls_val=high/low
cnt1 += 1 #类别出现频次统计
if e[attr_name] == attr_val: #如:attr_name=rest, attr_val=early/late、
cnt2 += 1 #类别中每个特征出现频次统计
return cnt2 / cnt1
# 预测分类结果
def NB(data, test, cls_y, cls_n): #cls_y与cls_n为待估计的类别
PY = P(data, cls_y) #求类别先验概率
PN = P(data, cls_n) #求类别先验概率
for key, val in test.items(): #items()函数将字典中项以列表方式返回
PY *= PT(data, cls_y, key, val) #求每类中特征出现的概率
PN *= PT(data, cls_n, key, val) #求每类中特征出现的概率
return {cls_y: PY, cls_n: PN} #返回结果
# 预测分类概率
Pr=NB(data, {"attend": "early", "rest": "late"}, "high", "low")
print(Pr)