import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
os.environ['TF_DETERMINISTIC_OPS'] = '1'
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization, Concatenate, Reshape, Conv1D, \
GlobalAveragePooling1D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import tifffile
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import gc
import warnings
warnings.filterwarnings('ignore')
# 清除计算图
tf.keras.backend.clear_session()
# GPU 配置
try:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
print("✅ GPU 加速已启用")
else:
print("⚠️ 未检测到 GPU,使用 CPU 训练")
except Exception as e:
print(f"❌ GPU 配置失败: {str(e)}")
class MultiModalDataGenerator(tf.keras.utils.Sequence):
"""改进的数据生成器 - 使用 tf.data API 兼容格式"""
def __init__(self, image_paths, chemical_data, labels, batch_size=16, shuffle=True):
self.image_paths = image_paths
self.chemical_data = chemical_data
self.labels = labels
self.batch_size = batch_size
self.shuffle = shuffle
self.indices = np.arange(len(self.image_paths))
# 计算均值用于填充无效样本
self.image_mean = self._calculate_image_mean()
self.chem_mean = self._calculate_chem_mean()
self.on_epoch_end()
def _calculate_image_mean(self):
"""计算图像均值用于填充无效样本"""
sample_img = np.zeros((39, 7, 4), dtype=np.float32)
count = 0
for img_path in self.image_paths[:min(100, len(self.image_paths))]:
try:
img = tifffile.imread(img_path)
if img.shape == (7, 4, 39):
img = np.moveaxis(img, -1, 0)
elif img.shape == (39, 4, 7):
img = np.transpose(img, (0, 2, 1))
if img.shape == (39, 7, 4):
sample_img += img.astype(np.float32)
count += 1
except:
continue
return sample_img / max(count, 1) if count > 0 else np.zeros((39, 7, 4))
def _calculate_chem_mean(self):
"""计算化学数据均值用于填充无效样本"""
if isinstance(self.chemical_data, np.ndarray):
return np.nanmean(self.chemical_data, axis=0)
elif isinstance(self.chemical_data, pd.DataFrame):
return self.chemical_data.mean().values
else:
return np.zeros(39)
def __len__(self):
return int(np.ceil(len(self.indices) / self.batch_size))
def __getitem__(self, idx):
low = idx * self.batch_size
high = min(low + self.batch_size, len(self.indices))
batch_indices = self.indices[low:high]
batch_images = []
batch_chemical = []
batch_labels = []
# 记录哪些样本是无效的(占位数据)
batch_valid_mask = []
for i in batch_indices:
valid_sample = True
try:
# 尝试加载和处理图像
img = tifffile.imread(self.image_paths[i])
# 统一形状为 (39, 7, 4)
if img.shape == (7, 4, 39):
img = np.moveaxis(img, -1, 0)
elif img.shape == (39, 4, 7):
img = np.transpose(img, (0, 2, 1))
elif img.shape != (39, 7, 4):
# 使用均值填充无效样本
img = self.image_mean.copy()
valid_sample = False
img = img.astype(np.float32)
# 检查NaN或全零图像
if np.isnan(img).any() or img.max() == img.min():
img = self.image_mean.copy()
valid_sample = False
except Exception as e:
# 加载失败时使用均值图像
img = self.image_mean.copy()
valid_sample = False
try:
# 处理化学数据
if isinstance(self.chemical_data, np.ndarray):
chem_feat = self.chemical_data[i].reshape(-1)
else:
chem_feat = self.chemical_data.iloc[i].values.reshape(-1)
if chem_feat.shape != (39,) or np.isnan(chem_feat).any():
chem_feat = self.chem_mean.copy()
valid_sample = False
except:
chem_feat = self.chem_mean.copy()
valid_sample = False
batch_images.append(img)
batch_chemical.append(chem_feat)
batch_labels.append(self.labels[i])
batch_valid_mask.append(valid_sample)
# 构建批次
X_img = np.stack(batch_images)
X_chem = np.array(batch_chemical, dtype=np.float32)
y_batch = np.array(batch_labels, dtype=np.int32)
valid_mask = np.array(batch_valid_mask, dtype=bool)
# 返回数据、标签和有效样本掩码
return (X_img, X_chem), y_batch, valid_mask
def on_epoch_end(self):
if self.shuffle:
np.random.shuffle(self.indices)
def to_dataset(self):
"""转换为 tf.data.Dataset 格式"""
def gen():
for i in range(len(self)):
inputs, labels, _ = self[i] # 忽略valid_mask
yield inputs, labels
# 使用您建议的格式:明确指定dtype和shape
output_signature = (
(
tf.TensorSpec(shape=(None, 39, 7, 4), dtype=tf.float32), # 图像输入
tf.TensorSpec(shape=(None, 39), dtype=tf.float32) # 化学输入
),
tf.TensorSpec(shape=(None,), dtype=tf.int32) # 标签
)
return tf.data.Dataset.from_generator(
gen,
output_signature=output_signature
).prefetch(tf.data.AUTOTUNE)
class MultiModalFusionModel:
def __init__(self, img_root="E:\\西北地区铜镍矿\\多模态测试\\图片训练",
data_path="E:\\西北地区铜镍矿\\数据\\训练数据.xlsx"):
self.img_root = img_root
self.data_path = data_path
self.scaler = StandardScaler()
self.model = None
self.history = None
def load_data(self):
print("🔍 正在加载数据...")
df = pd.read_excel(self.data_path)
print(f"原始数据形状: {df.shape}")
required = ['name', 'class']
for col in required:
if col not in df.columns:
raise ValueError(f"Excel 缺少必要列: {col}")
feature_cols = df.columns[6:45]
chemical_data = df[feature_cols].select_dtypes(include=[np.number])
label_map = {'positive': 0, 'neutral': 1, 'negative': 2}
image_paths, labels_list = [], []
for _, row in df.iterrows():
name = row['name']
cls = row['class']
if not isinstance(name, str) or cls not in label_map:
continue
class_dir = os.path.join(self.img_root, cls)
found = False
for ext in ['', '.tif', '.tiff']:
path = os.path.join(class_dir, f"{name}{ext}")
if os.path.exists(path):
image_paths.append(path)
labels_list.append(label_map[cls])
found = True
break
if not found:
# 即使找不到图像,也保留样本(后续使用占位数据)
image_paths.append(os.path.join(class_dir, "placeholder")) # 占位路径
labels_list.append(label_map[cls])
labels_array = np.array(labels_list)
print(f"✅ 加载 {len(image_paths)} 个样本")
counts = np.bincount(labels_array)
print(f"📊 标签分布: positive={counts[0]}, neutral={counts[1]}, negative={counts[2]}")
return image_paths, chemical_data, labels_array
def build_model(self):
print("🧱 正在构建模型...")
# 定义输入
image_input = Input(shape=(39, 7, 4), name='image_input')
chem_input = Input(shape=(39,), name='chemical_input')
# 图像分支
x = Reshape((39, 28))(image_input)
x = Conv1D(64, 3, activation='relu', padding='same')(x)
x = BatchNormalization()(x)
x = Conv1D(128, 3, activation='relu', padding='same')(x)
x = GlobalAveragePooling1D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
img_features = Dense(128, activation='relu')(x)
# 化学分支
y = Dense(128, activation='relu')(chem_input)
y = BatchNormalization()(y)
y = Dropout(0.3)(y)
y = Dense(256, activation='relu')(y)
y = Dropout(0.3)(y)
chem_features = Dense(128, activation='relu')(y)
# 融合分支
merged = Concatenate()([img_features, chem_features])
z = Dense(256, activation='relu')(merged)
z = Dropout(0.4)(z)
z = Dense(128, activation='relu')(z)
z = Dropout(0.3)(z)
output = Dense(3, activation='softmax')(z)
# 创建模型
model = Model(inputs=[image_input, chem_input], outputs=output)
optimizer = Adam(learning_rate=1e-4, clipnorm=1.0)
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=['accuracy']
)
# 打印模型结构
print("✅ 模型输入顺序: [图像输入, 化学输入]")
print("✅ 模型输入形状:", [i.shape for i in model.inputs])
print("✅ 模型输出形状:", model.output.shape)
self.model = model
return model
def train(self, image_paths, chemical_data, labels, test_size=0.2, batch_size=16, epochs=50):
print("🚀 开始训练...")
# 分割数据集
X_train_img, X_test_img, X_train_chem, X_test_chem, y_train, y_test = train_test_split(
image_paths, chemical_data, labels,
test_size=test_size, stratify=labels, random_state=42
)
# 标准化化学数据
print("🔢 标准化化学数据...")
self.scaler.fit(X_train_chem)
X_train_chem_scaled = self.scaler.transform(X_train_chem)
X_test_chem_scaled = self.scaler.transform(X_test_chem)
# 创建生成器
print("🔄 创建数据生成器...")
train_gen = MultiModalDataGenerator(X_train_img, X_train_chem_scaled, y_train, batch_size, shuffle=True)
val_gen = MultiModalDataGenerator(X_test_img, X_test_chem_scaled, y_test, batch_size, shuffle=False)
# 转换为 tf.data.Dataset
train_ds = train_gen.to_dataset()
val_ds = val_gen.to_dataset()
# 回调函数
callbacks = [
EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1),
ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=8, min_lr=1e-6, verbose=1),
ModelCheckpoint('best_multimodal_model.keras', save_best_only=True, monitor='val_accuracy', verbose=1)
]
# 开始训练(使用 tf.data.Dataset)
print("⏳ 训练中...")
self.history = self.model.fit(
train_ds,
validation_data=val_ds,
epochs=epochs,
callbacks=callbacks,
verbose=1
)
return self.history
def evaluate(self, image_paths, chemical_data, labels):
"""改进的评估方法,解决所有已知问题并提高准确率"""
print("📈 开始评估...")
# 标准化化学数据
chemical_data_scaled = self.scaler.transform(chemical_data)
# 创建生成器
test_gen = MultiModalDataGenerator(image_paths, chemical_data_scaled, labels, batch_size=16, shuffle=False)
# 收集所有有效样本的预测和标签
all_preds = []
all_labels = []
# 逐个批次预测并收集有效样本
for i in range(len(test_gen)):
(batch_img, batch_chem), batch_label, valid_mask = test_gen[i]
# 预测
batch_pred = self.model.predict([batch_img, batch_chem], verbose=0)
# 只保留有效样本
valid_indices = np.where(valid_mask)[0]
if len(valid_indices) > 0:
all_preds.append(batch_pred[valid_indices])
all_labels.append(batch_label[valid_indices])
# 释放内存
del batch_img, batch_chem, batch_label, batch_pred
if i % 10 == 0:
gc.collect()
# 合并所有批次的结果
if not all_preds:
raise ValueError("没有有效样本用于评估")
y_pred_probs = np.vstack(all_preds)
y_true = np.concatenate(all_labels)
y_pred = np.argmax(y_pred_probs, axis=1)
# 计算并打印结果
print(f"✅ 有效样本数量: {len(y_true)}/{len(labels)}")
acc = accuracy_score(y_true, y_pred)
print(f"🎯 准确率: {acc:.4f}")
print("\n📋 分类报告:")
print(classification_report(y_true, y_pred, target_names=['positive', 'neutral', 'negative']))
# 混淆矩阵 - 使用非交互式方式保存
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=['positive', 'neutral', 'negative'],
yticklabels=['positive', 'neutral', 'negative'])
plt.title('混淆矩阵')
plt.ylabel('真实标签')
plt.xlabel('预测标签')
plt.tight_layout()
# 保存但不显示
plt.savefig('confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.close() # 重要:关闭图形释放内存
print("✅ 混淆矩阵已保存为 'confusion_matrix.png'")
# 分析模型性能问题
self._analyze_performance(y_true, y_pred, y_pred_probs)
return acc, y_pred, y_pred_probs
def _analyze_performance(self, y_true, y_pred, y_pred_probs):
"""分析模型性能问题并提供改进建议"""
# 计算每个类别的准确率
class_acc = []
for cls in range(3):
idx = (y_true == cls)
cls_acc = accuracy_score(y_true[idx], y_pred[idx])
class_acc.append(cls_acc)
print("\n🔍 性能分析:")
print(f"positive类准确率: {class_acc[0]:.4f}")
print(f"neutral类准确率: {class_acc[1]:.4f}")
print(f"negative类准确率: {class_acc[2]:.4f}")
# 识别最难分类的样本
max_prob_diff = np.max(y_pred_probs, axis=1) - np.take_along_axis(y_pred_probs, y_true.reshape(-1, 1),
axis=1).flatten()
hard_indices = np.argsort(max_prob_diff)[:20] # 找出20个最难样本
print("\n💡 模型改进建议:")
if class_acc[1] < 0.5: # neutral类准确率低
print("1. neutral类识别困难,建议增加该类样本或使用数据增强")
if abs(class_acc[0] - class_acc[2]) > 0.2: # 类别间不平衡
print("2. 检测到类别不平衡问题,建议使用class_weight参数")
if np.mean(max_prob_diff) > 0.3: # 模型不确定性高
print("3. 模型对许多样本预测不确定性高,建议增加训练轮数或模型复杂度")
# 保存困难样本分析
plt.figure(figsize=(10, 8))
for i, idx in enumerate(hard_indices):
plt.subplot(4, 5, i + 1)
cls = y_true[idx]
pred = y_pred[idx]
prob = y_pred_probs[idx][pred]
plt.title(f"T:{cls} P:{pred}\nProb:{prob:.2f}")
# 这里可以添加可视化样本的代码
plt.tight_layout()
plt.savefig('hard_samples.png', dpi=150)
plt.close()
print("✅ 困难样本分析已保存为 'hard_samples.png'")
def main():
# 强制清除会话
tf.keras.backend.clear_session()
# 创建并运行模型
model = MultiModalFusionModel()
image_paths, chemical_data, labels = model.load_data()
model.build_model()
# 训练模型
model.train(image_paths, chemical_data, labels, batch_size=8, epochs=100)
# 评估模型
acc, y_pred, probs = model.evaluate(image_paths, chemical_data, labels)
print(f"\n🎉 最终准确率: {acc:.4f}")
# 保存模型以供后续使用
model.model.save('final_multimodal_model.keras')
print("💾 模型已保存为 'final_multimodal_model.keras'")
if __name__ == "__main__":
main()
想把三分类改成二分类,positive为一类,negative、neutral合为一类