import pandas as pd
import numpy as np
def naive_bayes(train_data, train_label):
classification = train_data.columns
possibility = {}
num = []
label = train_label.unique()
for i in label:
each_num = train_data[train_label == i].count()
num.append(each_num)
for k in range(4):
for j in range(4):
possible = (num[j] + 1) / (train_data.count() + 4)
sub_dataset = dataset[dataset['汽车评价'] == label[j]]
for index, n in enumerate(test_set[k, :]):
possible *= (sub_dataset[sub_dataset[classification[index]] == n].count() + 1) / (
num[j] + train_data[classification[index]].unique().size)
possibility[j] = possible
result = [possibility[i].iloc[0] for i in range(4)]
print(f"第{(k + 1)}辆车的车辆评价为:{label[np.argmax(result)]}")
if __name__ == '__main__':
dataset = pd.read_table("实验二数据集.txt", names=['购买价格', '维护费用', '车门数', '可载人数', '车身大小', '安全程度', '汽车评价'], sep=',')
labels = dataset.iloc[:, -1]
data = dataset.iloc[:, :-1]
test_set = np.array([['low', 'low', '4', '4', 'small', 'high'],
['high', 'high', '4', '4', 'med', 'med'],
['low', 'high', '3', '2', 'med', 'low'],
['low', 'low', '2', 'more', 'big', 'high']])
naive_bayes(data, labels)
# X1:购买费用low,维护费用low,4门,可载4人,小车身,高安全性
# X2:购买费用high,维护费用high,4门,可载4人,中等车身,中等安全性
# X3:购买费用low,维护费用high,3门,可载2人,中等车身,低安全性
# X4:购买费用low,维护费用low,2门,可载4人以上,大车身,高安全性
朴素贝叶斯分类汽车
于 2022-04-13 16:44:54 首次发布