"""
#-*- coding: utf-8 -*-
年龄:0代表<=30,1代表31~40,2代表>40
收入:0代表高,1代表中,2代表低
学生:0代表否,1代表是
信誉:0代表中,1代表优
类别:1代表是,0代表否
"""
if __name__ == '__main__':
train_samples=[
[0, 0, 0, 0, 0],
[0, 0, 0, 1, 0],
[1, 0, 0, 0, 1],
[2, 1, 0, 0, 1],
[2, 2, 1, 0, 1],
[2, 2, 1, 1, 0],
[1, 2, 1, 1, 1],
[0, 1, 0, 0, 0],
[0, 2, 1, 0, 1],
[2, 1, 1, 0, 1]]
# 待分类样本
X=[1, 1, 0, 1]
# 训练样本数量 10
n_sample = len(train_samples)
# 单个样本的维度: 描述属性和类别属性个数 5
dim_sample = len(train_samples[0])
# 计算每个属性有哪些取值
attr = []
for i in range(0, dim_sample):
attr.append([])
for sample in train_samples:
for i in range(0, dim_sample):
if sample[i] not in attr[i]:
attr[i].append(sample[i])
# 每个属性取值的个数
n_attr = [len(attr) for attr in attr]
# 记录不同类别的样本个数
n_c = []
for i in range(0, n_attr[dim_sample - 1]):
n_c.append(0)
# 计算不同类别的样本个数
for sample in train_samples:
n_c[sample[dim_sample - 1]] += 1
# 计算不同类别样本所占概率
p_c = [n_cx / sum(n_c) for n_cx in n_c]
# print(p_c)
# 将用户按照类别分类
samples_at_c = {}
for c in attr[dim_sample - 1]:
samples_at_c[c] = []
for sample in train_samples:
samples_at_c[sample[dim_sample - 1]].append(sample)
# 记录 每个类别的训练样本中,取待分类样本的某个属性值的样本个数
n_attr_X = {}
for c in attr[dim_sample - 1]:
n_attr_X[c] = []
for j in range(0, dim_sample - 1):
n_attr_X[c].append(0)
# 计算 每个类别的训练样本中,取待分类样本的某个属性值的样本个数
for c, samples_at_cx in zip(samples_at_c.keys(), samples_at_c.values()):
for sample in samples_at_cx:
for i in range(0, dim_sample - 1):
if X[i] == sample[i]:
n_attr_X[c][i] += 1
# 字典转化为list
n_attr_X = list(n_attr_X.values())
n_attr_X[0], n_attr_X[1] = n_attr_X[1], n_attr_X[0]
# print(n_attr_X)
# 存储最终的概率
result_p = []
for i in range(0, n_attr[dim_sample - 1]):
result_p.append(p_c[i])
result_p=list(reversed(result_p))
n_c=list(reversed(n_c))
# 计算概率
for i in range(0,n_attr[dim_sample - 1]):
n_attr_X[i] = [x / n_c[i] for x in n_attr_X[i]]
for x in n_attr_X[i]:
result_p[i] *= x
print('概率分别为', result_p)
# 找到概率最大对应的那个类别,就是预测样本的分类情况
predict_class = result_p.index(max(result_p))
print(predict_class)
原文链接;https://blog.youkuaiyun.com/ten_sory/article/details/81237169