通过SMO算法,用python3写一个SVM的二分类器:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@version: py3.5 @license: Apache Licence
@author: 'Treamy' @contact: chenymcan@gmail.com
@file: my_svm.py @software: PyCharm
@time: 2018/1/26 13:17 @site: www.ymchen.cn
"""
"""
类似sklearn,先输入参数生成一个svm分类器,再通过SVM_training输入数据训练
"""
import numpy as np
import pickle
class SVM(object):
def __init__(self, C=1, toler=0.001, maxIter=500, kernel_option = ("",0)):
self.C = C # 惩罚参数
self.toler = toler # 迭代的终止条件之一
self.b = 0 # 阈值
self.max_iter = maxIter # 最大迭代次数
self.kernel_opt = kernel_option # 选用的核函数及其参数
def SVM_training(self, dataSet, labels, ):
# 1.输入数据集
# train_x_m, train_y_m = np.mat(train_x), np.mat(train_y)dataSet, labels,
self.train_x = np.mat(dataSet) # 训练数据集
self.train_y = np.mat(labels) # 测试数据集
self.train_y = self.train_y.T if np.shape(self.train_y)[0] == 1 else self.train_y # 将其转化为列向量
self.n_samples = np.shape(dataSet)[0] # 训练样本的个数
self.alphas = np.mat(np.zeros((self.n_samples, 1))) # 拉格朗日乘子(一个全0的列向量)
self.error_tmp = np.mat(np.zeros((self.n_samples, 2))) # 保存E的缓存
self.kernel_mat = self.calc_kernel(self.train_x, self.kernel_opt) # 核函数的输出
# 2.开始训练
entireSet = True
alpha_pairs_changed = 0
iteration = 0
while iteration<self.max_iter and (alpha_pairs_changed>0 or entireSet):
print("\t iteration: ",iteration)
alpha_pairs_changed = 0
if entireSet: # 对所有样本
for x in range(self.n_samples):
alpha_pairs_changed += self.choose_and_update(x)
iteration += 1
else: # 对非边界样本
bound_samples = []
for i in range(self.n_samples):
if self.alphas[i, 0] > 0 and self.alphas[i, 0] < self.C:
bound_samples.append(i)
for x in bound_samples:
alpha_pairs_changed += self.choose_and_update(x)
iteration += 1
if entireSet:
entireSet = False
elif alpha_pairs_changed == 0:
entireSet = True
return self
def cal_error(self, alpha_index_k):