下面是SVM的SMO实现,但是alpha的选择 没有按照启发式的方式,效率有点低下。
# coding=utf-8
from numpy import *
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
#加载数据集
def loadDataSet(fileName):
dataMat=[]
labelMat=[]
fr=open(fileName)
for line in fr.readlines():
lineArr=line.strip().split('\t')
dataMat.append([float(lineArr[0]),float(lineArr[1])])
labelMat.append(float(lineArr[2]))
return dataMat,labelMat
#i为alpha_1在样本集合中的下标 ,需要在找一个不同的alpha_2 其下表不能为i
def selectJrand(i,m):
j=i
while(j==i):
j=int(random.uniform(0,m))
return j
#alpha需要满足一定的约束条件 0<=alpha<=C sum(alpha*y)=0
def clipAlpha(aj,H,L):
if aj>H:
aj=H
if L>aj:
aj=L
return aj
#简化版本的SMO算法
#dataMatIn:表示输入数据集 classLabel:类别标签 toler:容错率 maxIter:最大迭代次数
def smoSimple(dataMatIn,classLabel,C,toler,maxIter):
dataMatrix=mat(dataMatIn)
labelMat=mat(classLabel).transpose()
b=0
m,n=shape(dataMatrix)
alphas=mat(zeros((m,1)))#需要m个alpha变量
iter=0
while(iter<maxIter):
alphaPairsChanged=0#当alpha不在改变的时候就增加迭代次数
for i in range(m):
#计算结果fXi
fXi=float(multiply(alphas,labelMat).T*(dataMatrix*dataMatrix[i,:].T))+b
Ei=fXi-float(labelMat[i])#计算误差函数
#若误差足够大 则更新该alpha 误差的大小极限为toler
if((labelMat[i]*Ei<-toler) and(alphas[i]<C)) or ((labelMat[i]*Ei>toler) and(alphas[i]>0)):
#查看样本点xi是否满足KKT条件 1.若xi为支持向量,那么alphas(0,C) 若yi*g(xi)>=1 那么alpha=0 若yi*g(xi)<=1那么就会有容错率 ai=C
j=selectJrand(i,m)
fXj=float(multiply(alphas,labelMat).T*(dataMatrix*dataMatrix[j,:].T))+b
Ej=fXj-float(labelMat[j])
alphaIold=alphas[i].copy()
alphaJold=alphas[j].copy()
if (labelMat[i]!=labelMat[j]):
#若i和j的类别不同,需要裁减
L=max(0,alphas[j]-alphas[i])
H=min(C,C+alphas[j]-alphas[i])
else:
L=max(0,alphas[j]+alphas[i]-C)
H=min(C,alphas[j]+alphas[i])
#if L==H:
# print "L==H"
# continue
eta = 2.0 * dataMatrix[i,:]*dataMatrix[j,:].T - dataMatrix[i,:]*dataMatrix[i,:].T - dataMatrix[j,:]*dataMatrix[j,:].T
#if (eta>=0):continue
alphas[j]-=labelMat[j]*(Ei-Ej)/eta
alphas[j]=clipAlpha(alphas[j],H,L)
if (abs(alphas[j]-alphaJold)<0.0001):
continue
alphas[i]+=labelMat[j]*labelMat[i]*(alphaJold-alphas[j])
b1 = b - Ei- labelMat[i]*(alphas[i]-alphaIold)*dataMatrix[i,:]*dataMatrix[i,:].T - labelMat[j]*(alphas[j]-alphaJold)*dataMatrix[i,:]*dataMatrix[j,:].T
b2 = b - Ej- labelMat[i]*(alphas[i]-alphaIold)*dataMatrix[i,:]*dataMatrix[j,:].T - labelMat[j]*(alphas[j]-alphaJold)*dataMatrix[j,:]*dataMatrix[j,:].T
if (0 < alphas[i]) and (C > alphas[i]): b = b1
elif (0 < alphas[j]) and (C > alphas[j]): b = b2
else: b = (b1 + b2)/2.0
alphaPairsChanged += 1
if (alphaPairsChanged==0):iter+=1
else:iter=0
return b,alphas
def calcWs(alphas,dataArr,classLabels):
X=mat(dataArr);labelMat=mat(classLabels).transpose()
m,n=shape(X)
w=zeros((n,1))
for i in range(m):
w+=multiply(alphas[i]*labelMat[i],X[i,:].T)#对应的元素相乘
return w
C=0.6
dataArr,labelArr=loadDataSet("testSet.txt")
b,alphas=smoSimple(dataArr,labelArr,C,0.0001,40)
ws=calcWs(alphas,dataArr,labelArr)
from matplotlib.patches import Circle
xcord0=[]
ycord0=[]
xcord1=[]
ycord1=[]
markers=[]
colors=[]
fr=open("testSet.txt")
for line in fr.readlines():
lineSplit = line.strip().split('\t')
xPt = float(lineSplit[0])
yPt = float(lineSplit[1])
label = int(lineSplit[2])
if (label == -1):
xcord0.append(xPt)
ycord0.append(yPt)
else:
xcord1.append(xPt)
ycord1.append(yPt)
fr.close()
xd=[]
yd=[]
m=shape(dataArr)[0]
for i in range(100):
if alphas[i]>0.0 and alphas[i]!=C:
xd.append(dataArr[i])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord0,ycord0, marker='s', s=50)
ax.scatter(xcord1,ycord1, marker='o', s=50, c='red')
plt.title('SVM')
totalm=shape(xd)[0];
for i in range(totalm):
circle = Circle((xd[i][0], xd[i][1]), 0.3, facecolor='none', edgecolor=(0,0.8,0.8), linewidth=2, alpha=0.5)
ax.add_patch(circle)
w0=float(ws[0])
w1=float(ws[1])
x=arange(-2.0,12.0,0.1)
x=mat(x)
z=(-w0*x-b)/w1
z=mat(z)
ax.plot(x.tolist()[0],z.tolist()[0])
ax.axis([-2,12,-8,6])
plt.show()
截图为:.