python-EM求解混合高斯分布
参考链接
代码
import numpy as np
def Normal(x,mu,sigma):#一元正态分布概率密度函数
return np.exp(-(x-mu)**2/(2*sigma**2))/(np.sqrt(2*np.pi)*sigma)
def GenerateData(num_1, mu_1, sigma_1, num_2, mu_2, sigma_2 ):
num_sum =num_1 + num_2
np.random.seed(1)
data_1 = np.random.normal(mu_1, sigma_1, num_1)
data_1 = data_1.reshape(-1, 1)
data_2 = np.random.normal(mu_2, sigma_2, num_2)
data_2 = data_2.reshape(-1, 1)
data = np.concatenate((data_1,data_2))
return data
# generate data
data = GenerateData(500, 5, 1, 500, 8, 3)
# data number
N = data.shape[0]
# mu init
mu_em = np.random.random((1,2))
# sigma init
sigma_square_em = np.random.random((1,2))#模型迭代用Sigma平方
# ratio init
ratio_1 = np.random.random()
ratio_2 = 1 - ratio_1
alpha_em = np.array([[ratio_1,ratio_2]])
iter_num = 0
while(True):
iter_num += 1
#Expectation
gauss_1 = Normal(data, mu_em[0][0], np.sqrt(sigma_square_em[0][0]))
gauss_2 = Normal(data, mu_em[0][1], np.sqrt(sigma_square_em[0][1]))
gamma_1 = alpha_em[0][0] * gauss_1
gamma_2 = alpha_em[0][1] * gauss_2
M = gamma_1 + gamma_2
#Maximization
sigma_square_em[0][0] = np.dot((gamma_1/M).T, (data-mu_em[0][0])**2)/np.sum(gamma_1/M)
sigma_square_em[0][1] = np.dot((gamma_2/M).T, (data-mu_em[0][1])**2)/np.sum(gamma_2/M)
# update
mu_em[0][0] = np.dot((gamma_1/M).T, data) / np.sum(gamma_1/M)
mu_em[0][1] = np.dot((gamma_2/M).T, data) / np.sum(gamma_2/M)
alpha_em[0][0] = np.sum(gamma_1/M) / N
alpha_em[0][1] = np.sum(gamma_2/M) / N
if iter_num % 100 == 0:
print( "iteration : ", iter_num )
print("\tmu, sigma, alpha : ", mu_em, np.sqrt(sigma_square_em), alpha_em)
if( iter_num >= 1000 ):
break