#LSTM + LightGBM
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Model
from keras.layers import Input, LSTM
# 假设你的数据存储在DataFrame中
data = pd.read_csv('embryo_data.csv')
# 提取时序特征和非时序特征
time_series_features = data[['aa1', 'bb1', 'cc1']]
non_time_series_features = data[['q1', 'q2', 'q3', 'q4']]
labels = data['outcome']
# 数据归一化
scaler_ts = MinMaxScaler()
time_series_scaled = scaler_ts.fit_transform(time_series_features)
scaler_non_ts = MinMaxScaler()
non_time_series_scaled = scaler_non_ts.fit_transform(non_time_series_features)
# 划分训练集和测试集
X_train_ts, X_test_ts, X_train_non_ts, X_test_non_ts, y_train, y_test = train_test_split(
time_series_scaled, non_time_series_scaled, labels, test_size=0.2, random_state=42
)
# 将时序特征转换为LSTM输入所需的三维形状 [样本数, 时间步长, 特征数]
time_steps = time_series_features.shape[1]
X_train_ts = X_train_ts.reshape((X_train_ts.shape[0], time_steps, 1))
X_test_ts = X_test_ts.reshape((X_test_ts.shape[0], time_steps, 1))
# LSTM特征提取模型
input_ts = Input(shape=(X_train_ts.shape[1], X_train_ts.shape[2]))
lstm_out = LSTM(50, return_sequences=False)(input_ts)
lstm_feature_extractor = Model(inputs=input_ts, outputs=lstm_out)
# 提取LSTM特征
X_train_lstm_features = lstm_feature_extractor.predict(X_train_ts)
X_test_lstm_features = lstm_feature_extractor.predict(X_test_ts)
# 将LSTM特征与非时序特征拼接
X_train_combined = np.concatenate([X_train_lstm_features, X_train_non_ts], axis=1)
X_test_combined = np.concatenate([X_test_lstm_features, X_test_non_ts], axis=1)
import lightgbm as lgb
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# LightGBM模型
lgb_train = lgb.Dataset(X_train_combined, label=y_train)
lgb_eval = lgb.Dataset(X_test_combined, label=y_test, reference=lgb_train)
# 设置参数
params = {
'objective': 'binary',
'metric': 'binary_error',
'boosting_type': 'gbdt',
'num_leaves': 31,
'learning_rate': 0.05,
'feature_fraction': 0.9
}
# 训练LightGBM
gbm = lgb.train(params, lgb_train, num_boost_round=100, valid_sets=lgb_eval, early_stopping_rounds=10)
# 预测
y_pred = gbm.predict(X_test_combined, num_iteration=gbm.best_iteration)
y_pred_classes = (y_pred > 0.5).astype(int)
# 评估结果
print(f'Test Accuracy: {accuracy_score(y_test, y_pred_classes):.2f}')
print(confusion_matrix(y_test, y_pred_classes))
print(classification_report(y_test, y_pred_classes))
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
# 假设你的数据存储在DataFrame中
data = pd.read_csv('embryo_data.csv')
# 提取时序特征和非时序特征
time_series_features = data[['aa1', 'bb1', 'cc1']]
non_time_series_features = data[['q1', 'q2', 'q3', 'q4']]
labels = data['outcome'] # 结果
# 数据归一化
scaler_ts = MinMaxScaler()
time_series_scaled = scaler_ts.fit_transform(time_series_features)
scaler_non_ts = MinMaxScaler()
non_time_series_scaled = scaler_non_ts.fit_transform(non_time_series_features)
# 划分训练集和测试集
X_train_ts, X_test_ts, X_train_non_ts, X_test_non_ts, y_train, y_test = train_test_split(
time_series_scaled, non_time_series_scaled, labels, test_size=0.2, random_state=42
)
# 将时序特征转换为LSTM输入所需的三维形状 [样本数, 时间步长, 特征数]
time_steps = time_series_features.shape[1]
X_train_ts = X_train_ts.reshape((X_train_ts.shape[0], time_steps, 1))
X_test_ts = X_test_ts.reshape((X_test_ts.shape[0], time_steps, 1))
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Dropout, Concatenate
# LSTM部分
input_ts = Input(shape=(X_train_ts.shape[1], X_train_ts.shape[2]))
lstm_out = LSTM(50, return_sequences=False)(input_ts)
lstm_out = Dropout(0.2)(lstm_out)
# 全连接层部分
input_non_ts = Input(shape=(X_train_non_ts.shape[1],))
dense_out = Dense(32, activation='relu')(input_non_ts)
dense_out = Dropout(0.2)(dense_out)
# 合并两部分输出
merged = Concatenate()([lstm_out, dense_out])
output = Dense(1, activation='sigmoid')(merged) # 二分类
# 创建模型
model = Model(inputs=[input_ts, input_non_ts], outputs=output)
# 编译模型
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# 训练模型
history = model.fit(
[X_train_ts, X_train_non_ts], y_train,
epochs=50, batch_size=32, validation_split=0.2
)
# 评估模型
loss, accuracy = model.evaluate([X_test_ts, X_test_non_ts], y_test)
print(f'Test Accuracy: {accuracy:.2f}')
# 进行预测
y_pred = model.predict([X_test_ts, X_test_non_ts])
y_pred_classes = (y_pred > 0.5).astype(int) # 将预测结果转为0或1
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test, y_pred_classes))
print(classification_report(y_test, y_pred_classes))