若要让HMM模型的初始概率始终在某个位置为1,可对 `self.hmm_model.startprob_` 的赋值进行修改。假定希望第 `index` 个位置的初始概率为1,其余位置为0,可按如下方式修改代码:
```python
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from scipy.spatial.distance import cdist
from hmmlearn import hmm
import matplotlib.pyplot as plt
import seaborn as sns
class HMMFCM:
def __init__(self, n_clusters=3, fuzziness=2, max_iter=100, tol=1e-4, n_hmm_states=3, start_prob_index=0):
"""
初始化HMM-FCM组合模型
参数:
- n_clusters: FCM聚类数量
- fuzziness: 模糊系数(m > 1)
- max_iter: 最大迭代次数
- tol: 收敛阈值
- n_hmm_states: HMM隐藏状态数
- start_prob_index: 初始概率为1的位置索引
"""
self.n_clusters = n_clusters
self.fuzziness = fuzziness
self.max_iter = max_iter
self.tol = tol
self.n_hmm_states = n_hmm_states
self.start_prob_index = start_prob_index
self.scaler = StandardScaler(with_std=False) # 仅中心化,不缩放
# 模型存储
self.fcm_centers = None
self.fcm_membership = None
self.hmm_model = None
def fuzzy_cmeans(self, data, n_clusters=None, initial_centers=None, m=1.5, max_iter=None, tol=None):
"""
改进的FCM实现
参数:
- data: 输入数据(n_samples, n_features)
- n_clusters: 聚类数量
- initial_centers: 初始中心点
- m: 模糊系数
- max_iter: 最大迭代次数
- tol: 收敛阈值
返回:
- centers: 聚类中心
- membership: 隶属度矩阵
"""
data = np.array(data)
if data.ndim == 1:
data = data.reshape(-1, 1)
n_clusters = n_clusters or self.n_clusters
m = m or self.fuzziness
max_iter = max_iter or self.max_iter
tol = tol or self.tol
# 初始中心点
if initial_centers is None:
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
kmeans.fit(data)
centers = kmeans.cluster_centers_
else:
centers = np.array(initial_centers)
# FCM迭代
for iteration in range(max_iter):
distances = cdist(data, centers, 'euclidean') + 1e-10
inv_distances = 1.0 / (distances ** (2 / (m - 1)))
membership = inv_distances / np.sum(inv_distances, axis=1, keepdims=True)
new_centers = np.dot(membership.T ** m, data) / np.sum(membership.T ** m, axis=1, keepdims=True)
if np.linalg.norm(new_centers - centers) < tol:
print(f"FCM收敛于第{iteration + 1}次迭代")
break
centers = new_centers
return centers, membership
def _prepare_hmm_observations(self, X_scaled, membership):
"""
生成HMM观测序列(保留时间结构)
"""
fcm_labels = np.argmax(membership, axis=1)
sorted_indices = np.argsort(fcm_labels) # 按FCM标签排序(模拟时间序列)
return X_scaled[sorted_indices]
def fit(self, X):
"""
训练HMM-FCM组合模型
参数:
- X: 输入数据(n_samples, n_features)
"""
# 1. 数据预处理
X_scaled = self.scaler.fit_transform(X)
# 2. 第一次模糊聚类(FCM)
print("执行第一次模糊聚类(FCM)...")
self.fcm_centers, self.fcm_membership = self.fuzzy_cmeans(X_scaled)
# 3. 准备HMM观测序列
print("生成HMM观测序列...")
obs_sequence = self._prepare_hmm_observations(X_scaled, self.fcm_membership)
# 4. 训练HMM模型
print("训练HMM模型...")
self.hmm_model = hmm.GaussianHMM(
n_components=self.n_hmm_states,
covariance_type="diag",
n_iter=200,
random_state=42,
init_params="" # 禁用自动初始化
)
# 手动初始化参数
start_prob = np.zeros(self.n_hmm_states)
start_prob[self.start_prob_index] = 1
self.hmm_model.startprob_ = start_prob # 初始概率在指定位置为1
self.hmm_model.transmat_ = np.ones((self.n_hmm_states, self.n_hmm_states)) / self.n_hmm_states # 均匀转移矩阵
self.hmm_model.means_ = self.fcm_centers.copy() # 用FCM中心初始化HMM均值
self.hmm_model.covars_ = np.tile(np.var(X_scaled, axis=0), (self.n_hmm_states, 1)) # 初始化协方差
# 训练模型
self.hmm_model.fit(obs_sequence)
# 打印初始化后的参数(验证是否生效)
print("HMM训练完成!")
print(f"初始状态概率:\n{self.hmm_model.startprob_}")
print(f"转移矩阵:\n{self.hmm_model.transmat_}")
print(f"高斯均值:\n{self.hmm_model.means_}")
# 模型评估
log_likelihood = self.hmm_model.score(obs_sequence)
print(f"模型对数似然: {log_likelihood:.2f}")
return self
def predict(self, X):
"""预测新数据的隐藏状态"""
if self.hmm_model is None:
raise ValueError("模型尚未训练,请先调用fit方法")
X_scaled = self.scaler.transform(X)
_, membership = self.fuzzy_cmeans(X_scaled) # 获取FCM隶属度
obs_sequence = self._prepare_hmm_observations(X_scaled, membership)
hidden_states = self.hmm_model.predict(obs_sequence)
return hidden_states
# 示例用法
if __name__ == "__main__":
# 生成具有时间结构的数据(3个状态)
np.random.seed(42)
n_samples_per_state = 10000
X_state0 = np.random.normal(loc=[0, 0], scale=1, size=(n_samples_per_state, 2))
X_state1 = np.random.normal(loc=[3, 3], scale=1, size=(n_samples_per_state, 2))
X_state2 = np.random.normal(loc=[6, 6], scale=1, size=(n_samples_per_state, 2))
X = np.vstack([X_state0, X_state1, X_state2])
true_labels = np.repeat([0, 1, 2], n_samples_per_state) # 真实标签(仅用于验证)
# 创建并训练模型
model = HMMFCM(n_clusters=3, n_hmm_states=3, fuzziness=2, start_prob_index=1)
model.fit(X)
# 预测
hidden_states = model.predict(X)
# 可视化
plt.figure(figsize=(15, 5))
# 真实状态
plt.subplot(1, 3, 1)
plt.scatter(X[:, 0], X[:, 1], c=true_labels, cmap='viridis')
plt.title("真实状态")
# FCM聚类结果
plt.subplot(1, 3, 2)
fcm_labels = np.argmax(model.fcm_membership, axis=1)
plt.scatter(X[:, 0], X[:, 1], c=fcm_labels, cmap='plasma')
plt.title("FCM聚类结果")
# HMM预测状态
plt.subplot(1, 3, 3)
plt.scatter(X[:, 0], X[:, 1], c=hidden_states, cmap='coolwarm')
plt.scatter(model.hmm_model.means_[:, 0], model.hmm_model.means_[:, 1],
c='red', marker='X', s=200, label='HMM中心')
plt.title("HMM隐藏状态")
plt.legend()
plt.show()
# 转移矩阵热图
plt.figure(figsize=(6, 4))
sns.heatmap(model.hmm_model.transmat_, annot=True, cmap='Blues', cbar=False)
plt.title("状态转移矩阵")
plt.xlabel("To State")
plt.ylabel("From State")
plt.show()
```
在上述代码里,`__init__` 方法新增了 `start_prob_index` 参数,此参数用于指定初始概率为1的位置索引。在 `fit` 方法中,`start_prob` 数组被初始化为全0,接着把 `start_prob_index` 位置的值设为1,再将其赋值给 `self.hmm_model.startprob_`。