import numpy as np
from scipy.stats import multivariate_normal
def train(x, max_iter=400):
m, n = np.shape(x)
mu1 = x.min(axis=0)
mu2 = x.max(axis=0)
sigma1 = np.identity(n) # 初始协方差矩阵
sigma2 = np.identity(n)
pi = 0.5
for i in range(max_iter):
norm1 = multivariate_normal(mu1, sigma1)
norm2 = multivariate_normal(mu2, sigma2)
tau1 = pi * norm1.pdf(x)
tau2 = (1 - pi) * norm2.pdf(x)
w = tau1 / (tau1 + tau2)
mu1 = np.dot(w, x) / np.sum(w)
mu2 = np.dot(1 - w, x) / np.sum(1 - w)
# 计算协方差矩阵(对于一维数据就是方差)
cov1 = np.dot(w * (x - mu1).T, (x - mu1)) / np.sum(w)
cov2 = np.dot((1 - w) * (x - mu2).T, (x - mu2)) / np.sum(1 - w)
# 更新协方差矩阵
sigma1 = cov1
sigma2 = cov2
pi = np.sum(w) / m
# 返回方差值和标准差
variance1 = sigma1[0, 0] # 提取方差值(标量)
variance2 = sigma2[0, 0]
std1 = np.sqrt(variance1) # 方差转标准差
std2 = np.sqrt(variance2)
return (pi, mu1[0], mu2[0], variance1, variance2, std1, std2)
if __name__ == '__main__':
x = np.array([
[66], [67], [62], [42], [69], [61], [69], [49], [41], [60],
[73], [60], [47], [73], [76], [76], [41], [41], [55], [53],
[57], [57], [74], [69], [71], [56], [59], [53], [65], [72],
[68], [62], [70], [63], [49], [46], [57], [49], [48], [61],
[48], [62], [60], [68], [69], [90], [74], [68], [74], [71],
[68], [75], [88], [84], [71], [76], [78], [89], [76], [82],
[88], [89], [78], [87], [64], [96], [92], [89], [77], [80],
[66], [55], [89], [77], [83], [81], [75], [55], [65]
])
pi, mu1, mu2, var1, var2, std1, std2 = train(x)
print("班级1的平均分:", mu1)
print("班级2的平均分:", mu2)
print("班级1的方差:", var1)
print("班级2的方差:", var2)
print("班级1的标准差:", std1)
print("班级2的标准差:", std2)
print("班级1的比例:", pi)
print("班级2的比例:", 1 - pi)代码有问题吗
最新发布