朴素贝叶斯[数学模型]——机器学习-优快云博客

本文链接：https://blog.youkuaiyun.com/m0_63047698/article/details/131173662

data = [
        {" attend ": "early", "rest": "late", "score": "high"},
        {" attend ": "early", "rest": "early","score": "high"},
        {" attend ": "early", "rest": "late", "score": "low"},
        {" attend ": "late", "rest": "early", "score": "low"},
        {" attend ": "late", "rest": "late", "score": "high"},
        {" attend ": "early", "rest": "late",  "score": "high"},
        {" attend ": "early", "rest": "early",  "score": "high"},
        {" attend ": "late", "rest": "early","score": "low"},
        {" attend ": "late", "rest": "late","score": "low"},
        {" attend ": "early", "rest": "late","score": "high"},
    ]

#计算先验概率
def P(data, cls_val, cls_name="score"):
    cnt = 0.0
    for e in data:
        if e[cls_name] == cls_val:  # cls_val=high/low
            cnt += 1
    return cnt / len(data)

#计算条件概率
def PT(data, cls_val, attr_name, attr_val, cls_name="score"):
    cnt1 = 0.0
    cnt2 = 0.0
    for e in data:
        if e[cls_name] == cls_val: #cls_val=high/low
            cnt1 += 1 #类别出现频次统计
            if e[attr_name] == attr_val: #如：attr_name=rest, attr_val=early/late、
                cnt2 += 1 #类别中每个特征出现频次统计
    return cnt2 / cnt1

# 预测分类结果
def NB(data, test, cls_y, cls_n): #cls_y与cls_n为待估计的类别
    PY = P(data, cls_y)  #求类别先验概率
    PN = P(data, cls_n) #求类别先验概率
    for key, val in test.items():  #items()函数将字典中项以列表方式返回
        PY *= PT(data, cls_y, key, val) #求每类中特征出现的概率
        PN *= PT(data, cls_n, key, val) #求每类中特征出现的概率
    return {cls_y: PY, cls_n: PN} #返回结果

# 预测分类概率
Pr=NB(data, {"attend": "early", "rest": "late"}, "high", "low")
print(Pr)