import numpy as np
import os
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from scipy import interpolate
# 数据标准化函数
def data_std(vector_raw):
x_raw = np.linspace(1, 1000, len(vector_raw))
x_new = np.linspace(1, 1000, 1000)
tck = interpolate.splrep(x_raw, vector_raw)
vector_new = interpolate.splev(x_new, tck)
return vector_new
# 定义数据集和模型类
class dataset_model:
def __init__(self):
self.train_data = [] # 一系列初始化
self.test_data = []
self.train_label = []
self.test_label = []
self.scaler = None # 添加一个 scaler 属性用于保存训练时的标准化器
# 加载训练数据
def train_data_load(self, path, i_max, j_max):
#self.train_data = [] 重复
self.train_label = []
for i in range(1, i_max + 1):
for j in range(0, j_max + 1):
file_path = os.path.join(path, f"{i}", f"test{j}.txt")
if os.path.exists(file_path): # 检查文件是否存在
with open(file_path, 'r') as f:
tmp = data_std(list(np.loadtxt(f))) # 调用data_std对数据进行插值标准化
if len(self.train_data) == 0: # 数据为空,初始化
self.train_data = tmp.reshape(1, -1)
else:
self.train_data = np.vstack((self.train_data, tmp)) # 否则堆叠到已有训练数据
self.train_label.append(i - 1) # 标签为文件夹编号减1
# 加载测试数据
def test_data_load(self, path, i_max, j_max):
#self.test_data = []
self.test_label = []
for i in range(1, i_max + 1):
for j in range(0, j_max + 1):
file_path = os.path.join(path, f"{i}", f"test{j}.txt")
if os.path.exists(file_path):
with open(file_path, 'r') as f:
tmp = data_std(list(np.loadtxt(f)))
if len(self.test_data) == 0:
self.test_data = tmp.reshape(1, -1)
else:
self.test_data = np.vstack((self.test_data, tmp))
self.test_label.append(i - 1) # 标签为文件夹编号减1
# 训练模型
def train_model(self):
if len(self.train_data) != len(self.train_label): # 检查训练数据和标签长度是否一致
raise ValueError(
f"Training data and labels have inconsistent lengths: {len(self.train_data)} vs {len(self.train_label)}")
# 初始化并拟合 StandardScaler
self.scaler = StandardScaler()
self.train_data_std = self.scaler.fit_transform(self.train_data)
# 使用 GridSearchCV 进行超参数优化
param_grid = {'C': [1e1, 1e2, 1e3], 'gamma': [0.0001, 0.0005, 0.001]} # 定义超参数网格
clf = GridSearchCV(svm.SVC(kernel='rbf', class_weight='balanced'), param_grid, cv=5)
clf.fit(self.train_data_std, self.train_label)
print("Best parameters:", clf.best_params_)
return clf
# 测试模型
def test_model(self, clf, testwho):
if testwho == 'train':
print('Training accuracy:', clf.score(self.train_data_std, self.train_label))
pred_train = clf.predict(self.train_data_std)
print(classification_report(pred_train, self.train_label,zero_division=0))
cm = confusion_matrix(pred_train, self.train_label)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=np.unique(self.train_label))
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix (Train)")
plt.show()
elif testwho == 'test':
if len(self.test_data) != len(self.test_label):
raise ValueError(
f"Test data and labels have inconsistent lengths: {len(self.test_data)} vs {len(self.test_label)}")
# 使用训练时的 scaler 对测试数据进行标准化
if self.scaler is None:
raise ValueError("Scaler has not been fitted yet. Please call 'train_model' first.")
self.test_data_std = self.scaler.transform(self.test_data)
print('Test accuracy:', clf.score(self.test_data_std, self.test_label))
pred_test = clf.predict(self.test_data_std)
print(classification_report(pred_test, self.test_label,zero_division=0))
cm = confusion_matrix(pred_test, self.test_label)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=np.unique(self.test_label))
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix (Test)")
plt.show()
# 主函数
if __name__ == '__main__':
height = dataset_model()
height.train_data_load('D:\\PyCharm\\pythonProject\\SVMclassifierdata\\test', 2, 25)
height.test_data_load('D:\\PyCharm\\pythonProject\\SVMclassifierdata\\testdata', 2, 25)
clf = height.train_model()
height.test_model(clf, 'train')
height.test_model(clf, 'test')
修改上述代码使得SVM算法训练结果可视化,并输出修改后的完整代码