hmm(二)

参数估计

根据观察到的序列集来找到一个最有可能的 HMM


代码

#coding:utf-8

class Hmm:
	def __init__(self, description=None):
		self.description = description

	@property
	def hidden_states(self):
	    return self._hidden_states
	@hidden_states.setter
	def hidden_states(self, vals):
		self._hidden_states = vals

	@property
	def observed_states(self):
	    return self._observed_states
	@observed_states.setter
	def observed_states(self, vals):
		self._observed_states = vals

	@property
	def transition_matrix(self):
	    return self._transition_matrix
	@transition_matrix.setter
	def transition_matrix(self, vals):
		self._transition_matrix = vals

	@property
	def emission_matrix(self):
	    return self._emission_matrix
	@emission_matrix.setter
	def emission_matrix(self, vals):
		self._emission_matrix = vals

	@property
	def start_prob(self):
		return self._start_prob
	@start_prob.setter
	def start_prob(self, val):
		self._start_prob = val

	@property
	def final_prob(self):
		return self._final_prob
	@final_prob.setter
	def final_prob(self, val):
		self._final_prob = val

	@property
	def num_states(self):
	    return self._num_states
	@num_states.setter
	def num_states(self, val):
		self._num_states = val

	@property
	def num_ob_states(self):
	    return self._num_ob_states
	@num_states.setter
	def num_ob_states(self, val):
		self._num_ob_states = val
	
	


def hmmfunc_wrapper(func):
	def inner_wrapper(*args, **kwargs):
		print '*** before called method=[%s]***' % func.__name__
		func(args, kwargs)
		print '*** after called method=[%s]***' % func.__name__
	return inner_wrapper

import copy
import numpy as np

class HmmFunc:
	def __init__(self, description):
		self.description = description

	def _find_start_state(self,state, src_states):
		for i , val in enumerate(src_states):
			if val == state:
				return i
		return 0

	def cal_likehood(self, observerd_datas, hmm):

		for j,state in enumerate(observerd_datas):
			if j == 0:
				#alpha(t,j) = a0(j) * emission(j,0)
				start_idx = self._find_start_state(observerd_datas[0], hmm.observed_states)
				alpha = [1.0 * val * hmm.emission_matrix[i][start_idx] for i , val in enumerate(hmm.start_prob)]
				continue

			tmp = [0.0] * len(alpha)
			# Transition : N * N
			# Emission   : N * Ob
			# alpha(t,j) = sum_k( alpha(t-1,k)* transition(k,m) )* emission(m,j)
			idx = self._find_start_state(state,hmm.observed_states)
			num_states = hmm.num_states
			for k in range(num_states):
				sum = 0
				for m in range(num_states):
					sum += alpha[m] * hmm.transition_matrix[m][k]
				#endfor m
				tmp[k] = sum * hmm.emission_matrix[k][idx]
			#endfor k
			alpha = [ v for v in tmp]
		return np.sum(alpha)

	def cal_most_prob_seq(self, observerd_datas, hmm):
		#max_probs_end in step j
		seq_len = len(observerd_datas)
		num_states = hmm.num_states
		backwards = [(num_states * [1.0])] * seq_len
		delta = [(num_states * [1.0])] * seq_len
		for j , state in enumerate(observerd_datas):
			if j == 0:
				start_idx = self._find_start_state(observerd_datas[0], hmm.observed_states)
				delta[0] = [1.0 * val * hmm.emission_matrix[i][start_idx] for i , val in enumerate(hmm.start_prob)]
				continue
			idx = self._find_start_state(state, hmm.observed_states)
			for k in range(num_states):
				max_val = -1000
				maxid = 0
				for m in range(num_states):
					 tmp = delta[j-1][m] * hmm.transition_matrix[m][k]
					 if tmp > max_val:
					 	max_val = tmp
					 	maxid = m
				delta[j][k] = max_val * hmm.emission_matrix[k][idx]
				backwards[j][k] = maxid
		#self.backwards = backwards
		#self.delta = delta
		idx = self._tranverse(delta, backwards, seq_len, num_states)
		return self._get_most_prob(idx, hmm.hidden_states)

	def _tranverse(self, delta, backwards, seq_len, num_states):
		max_val = delta[seq_len-1][0]
		max_id  = 0
		for key ,val in enumerate(delta[seq_len -1]):
			if val > max_val:
				val = max_val
				max_id = key - 1

		ret = []
		ret.append(max_id)
		for trs in range(seq_len -1 , 0, -1):
			ret.append(backwards[trs][max_id])
			max_id = backwards[trs][max_id]
		return ret

	def _get_most_prob(self, idx, hidden_states):
		a = [ hidden_states[i] for i in idx]
		#return a.reverse()
		a.reverse()
		return a

	def do_inference(self):
		pass

def normnize_arr(arr):
	sumv = sum(arr)
	return [ v / sumv for v in arr]



class HmmFucEm(HmmFunc):
	def __init__(self, hmm=None):
		self.description = None
		self.hmm = hmm
		HmmFunc.__init__(self,None)

	def cal_step_forward(self, observerd_datas):
		hmm = self.hmm
		ret = []
		for j,state in enumerate(observerd_datas):
			if j == 0:
				#alpha(t,j) = a0(j) * emission(j,0)
				start_idx = self._find_start_state(observerd_datas[0], hmm.observed_states)
				alpha = [1.0 * val * hmm.emission_matrix[i][start_idx] for i , val in enumerate(hmm.start_prob)]
				ret.append(copy.copy(alpha))
				continue

			tmp = [0.0] * len(alpha)
			# Transition : N * N
			# Emission   : N * Ob
			# alpha(t,j) = sum_k( alpha(t-1,k)* transition(k,m) )* emission(m,j)
			idx = self._find_start_state(state,hmm.observed_states)
			num_states = hmm.num_states
			for k in range(num_states):
				sum = 0
				for m in range(num_states):
					sum += alpha[m] * hmm.transition_matrix[m][k]
				#endfor m
				tmp[k] = sum * hmm.emission_matrix[k][idx]
			#endfor k
			alpha = [ v for v in tmp]
			ret.append(copy.copy(alpha))

			self.forward = copy.copy(ret)
		return np.sum(self.forward[num_states-1])

	def cal_step_backward(self, observerd_datas):
		hmm = self.hmm
		ret = []
		num_states = hmm.num_states
		ob_len = len(observerd_datas)
		# init set all one's
		beta = [1.0] * num_states
		ret.append(copy.copy(beta))
		for j in range(ob_len-1, 0,-1):
			state = observerd_datas[j] 
			tmp = [0.0] * len(beta)
			# Transition : N * N
			# Emission   : N * Ob
			# beta(t, k) =  sum_m( beta(t+1,m) * transition( k, m) * emision(m, obj(t+1)) )
			idx = self._find_start_state(state,hmm.observed_states)
			# k(n+1) -- j
			for k in range(num_states):
				sumv = 0
				for m in range(num_states):
					sumv +=  beta[m] * hmm.transition_matrix[k][m] * hmm.emission_matrix[m][idx]
				#endfor m
				tmp[k] = sumv
			#endfor k
			beta = [ v for v in tmp]
			ret.append(copy.copy(beta))
			tp = []
			for v in range(len(ret)-1,-1,-1):
				tp.append(ret[v])
		#return prob j ==0

		self.backwards = copy.copy(tp)
		state = observerd_datas[0]
		idx = self._find_start_state(state,hmm.observed_states)
		prob = 0.0
		for v in range(num_states):
			prob += beta[v] * hmm.start_prob[v] * hmm.emission_matrix[v][idx]
		return prob

	def _cal_gamma_t_expectation_maximization_e(self, num_states, ob_len):
		ret=[]
		for t in range(ob_len):
			v = [0.0] * num_states
			sumall = 0.0
			for j in range(num_states):
				v[j] = self.forward[t][j] * self.backwards[t][j]
				sumall += v[j]
			# for j in range(num_states):
			# 	v[j] /= sumall
			v = np.array(v) / sumall
			ret.append(v)
		return ret

	def _cal_gamma_ij_t_expectation_maximization_e(self, num_states, ob_len, obs):
		tmp = []
		for t in range(ob_len - 1):
			idx = self._find_start_state( obs[t+1], self.hmm.observed_states)
			gamma_ij_t = [[0.0] * num_states] * num_states
			#gamma_ij_t = np.array(gamma_ij_t)
			sum_ij = 0.0
			for i in range(num_states):
				for j in range(num_states):
					gamma_ij_t[i][j] = self.forward[t][i] 
					gamma_ij_t[i][j] *= self.hmm.transition_matrix[i][j] 
					gamma_ij_t[i][j] *= self.hmm.emission_matrix[j][idx] 
					gamma_ij_t[i][j] *= self.backwards[t+1][j]
					sum_ij += gamma_ij_t[i][j]
			#endfor_i
			# for i in range(num_states):
			# 	for j in range(num_states):
			# 		gamma_ij_t[i][j] /=1
			gamma_ij_t = np.array(gamma_ij_t) / sum_ij
			tmp.append(gamma_ij_t)
		#endfor_t
		return tmp
	def _update_params_expectation_maximization_m(self, obs):
		num_states = self.hmm.num_states
		ob_len = len(obs)
		gamma_t = self._cal_gamma_t_expectation_maximization_e(num_states, ob_len)
		gamma_ij_t = self._cal_gamma_ij_t_expectation_maximization_e(num_states, ob_len, obs)
		#update params
		self.hmm.start_prob = [v for v in gamma_t[0]]

		#update transition_matrix
		for i in range(num_states):
			for j in range(num_states):
				denum = 0.0
				num = 0.0
				for t in range(ob_len-1):
					denum += gamma_ij_t[t][i][j]
					num += gamma_t[t][i]
				self.hmm.transition_matrix[i][j] = 1.0 * denum/ num
			self.hmm.transition_matrix[i] = normnize_arr(self.hmm.transition_matrix[i])
		#endfor transition_matrix

		#update emission_matrix
		for k in range(self.hmm.num_ob_states):
			for j in range(num_states):
				self.hmm.emission_matrix[j][k] = 0.0

		for k in range(self.hmm.num_ob_states):	
			sum_gamma_t_j = 0.0
			for j in range(num_states):
				for t in range(ob_len):
					sum_gamma_t_j += gamma_t[t][j]
					idx = self._find_start_state(obs[t], self.hmm.observed_states)
					if idx == k:
						self.hmm.emission_matrix[j][k] += gamma_t[t][j]
			for j in range(num_states):
				self.hmm.emission_matrix[j][k] /= sum_gamma_t_j
		for i in range(self.hmm.num_states):
			self.hmm.emission_matrix[i] = normnize_arr(self.hmm.emission_matrix[i])

		#end_all

	def _do_safe_mathlog(self):

		num_states = self.hmm.num_states
		num_ob_states = self.hmm.num_ob_states

		for k in range(num_states):
			self.hmm.start_prob[k] = 0.00001 + 0.99999 * self.hmm.start_prob[k]

		for k in range(num_states):
			for j in range(num_states):
				self.hmm.transition_matrix[k][j] = 0.00001 + 0.99999 * self.hmm.transition_matrix[k][j]


		for k in range(num_states):
			for s in range(num_ob_states):
				self.hmm.emission_matrix[k][s] = 0.00001 + 0.99999 * self.hmm.emission_matrix[k][s]



	def _cal_prob_em(self, obs):

		from math import log

		num_states = self.hmm.num_states
		num_ob_states = self.hmm.num_ob_states
		ob_len = len(obs)
		gamma_t = self._cal_gamma_t_expectation_maximization_e(num_states, ob_len)
		gamma_ij_t = self._cal_gamma_ij_t_expectation_maximization_e(num_states, ob_len, obs)

		Qsum = 0.0

		self._do_safe_mathlog()

		for k in range(num_states):
			Qsum += gamma_t[0][k] * log(self.hmm.start_prob[k])

		gamma_ij_t_sum = [[1.0] * num_states] * num_states
		for t in range(ob_len-1):
			for i in range(num_states):
				for j in range(num_states):
					gamma_ij_t_sum[i][j] += gamma_ij_t[t][i][j]

		for i in range(num_states):
			for j in range(num_states):
				Qsum += gamma_ij_t_sum[i][j] * log(self.hmm.transition_matrix[i][j])


		for i in range(num_states):
			for j in range(num_ob_states):
				for t in range(ob_len):
					Qsum += gamma_t[t][i] * log(self.hmm.emission_matrix[i][j])

		return Qsum




	def do_inference(self, obs):
		QsumOld = self._cal_prob_em(obs)
		self._update_params_expectation_maximization_m(obs)
		QsumNew = self._cal_prob_em(obs)

		from math import fabs

		while fabs(QsumNew - QsumOld) > 0.0000001:
			QsumOld = QsumNew
			self._update_params_expectation_maximization_m(obs)
			QsumNew = self._cal_prob_em(obs)

			print 'QsumNew=%s,QsumOld=%s' % (QsumNew,QsumOld)

		return QsumNew


if __name__ == '__main__':
	hmm = Hmm()
	hmm.hidden_states = ['Sunny','Cloudy','Rainy']
	hmm.observed_states = ['Damp','Soggy','Dry','Dryish']
	hmm.num_states = 3
	#hmm.num_ob_states = len(hmm.observed_states) 
	hmm.num_ob_states = 4
	hmm.start_prob =[0.63,0.17,0.20]
	hmm.transition_matrix=[[0.5,0.25,0.25], [0.375,0.125,0.375],[0.125,0.675,0.375]]
	hmm.emission_matrix=[[0.15,0.05,0.6,0.20],[0.25,0.25,0.25,0.25],[0.35,0.5,0.05,0.10]]
	
      
	func = HmmFunc('cal')
	#given transition matrix, emission_matrix, init_prob , observerd_sequences to calculate probility
	prob = func.cal_likehood(['Damp','Soggy','Dry'], hmm)
	print 'cal_likehood array=%s with probility=%s' % ( ['Damp','Soggy','Dry'], prob)
	
	#given transition matrix, emission_matrix, init_prob, observerd_sequences to calculate most prob hidden states
	most_prob = func.cal_most_prob_seq(['Damp','Soggy','Dry'], hmm)
	print 'cal_most_prob_seq=%s ' % (most_prob)



	emFunc = HmmFucEm(hmm=hmm)
	forward = emFunc.cal_step_forward(['Damp','Soggy','Dry'])
	#print forward
	print forward, emFunc.forward
	backwards = emFunc.cal_step_backward(['Damp','Soggy','Dry'])
	print backwards, emFunc.backwards


	pp = 0.0
	for i , j in zip(emFunc.forward[2], emFunc.backwards[2]):
		pp += i * j
	print 'pp=%s' % pp

	#do inference
	print '++++++++++++++==do_inference_++++++++++++='
	inferenceFunc = HmmFucEm(hmm=hmm)
	obs = ['Damp','Soggy','Dry']
	inferenceFunc.cal_step_forward(obs)
	inferenceFunc.cal_step_backward(obs)
	inferenceFunc.do_inference(obs)
	# print inferenceFunc.hmm.start_prob

	# import CheckFunc as checkFunc

	# print 'init_prob=%s' % checkFunc.check(inferenceFunc.hmm.start_prob)


	# print inferenceFunc.hmm.transition_matrix
	# print inferenceFunc.hmm.emission_matrix






标注


1. 只用了一个sample

2.参考《统计学习方法-李航》


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值