主要参考:
https://blog.youkuaiyun.com/sjz_hahalala479/article/details/81003517
https://blog.youkuaiyun.com/qq_25037903/article/details/83861118
小菜鸡开始学习ML了。。。
15.
import numpy as np
def getDataset(l,s):
X = np.zeros((l,5)) #多一个x0
Y = np.zeros((l,1))
for i in range(l):
ss = s[i].strip().split()
#print (type(ss[0]))
X[i][0] = 1
X[i][1] = ss[0]
X[i][2] = ss[1]
X[i][3] = ss[2]
X[i][4] = ss[3]
Y[i][0] = ss[4]
return X,Y
def sign(X,W):
w = np.matrix(W)
x = np.matrix(X)
if np.dot(W,X) >0:
#print (W)
return 1
else:
return -1
def pla(l,s,W):
X,Y = getDataset(l,s)
iteration = 1
updates = 0
while(iteration!=0):
iteration = 0
for i in range(l):
if sign(X[i],W) != Y[i]:
W = W+Y[i]*X[i]
iteration +=1
#break #这里不是break 之前搞错了算法流程 一直不对 是所有点顺序遍历后重新迭代
updates += iteration
return updates
with open('result.txt','r') as f:
s = f.readlines()
l = len(s) #行
W = np.zeros(5)
print(pla(l,s,W))
16、17
import numpy as np
import random
def getDataset(l,s):
X = np.zeros((l,5)) #多一个x0
Y = np.zeros((l,1))
for i in range(l):
ss = s[i].strip().split()
#print (type(ss[0]))
X[i][0] = 1
X[i][1] = ss[0]
X[i][2] = ss[1]
X[i][3] = ss[2]
X[i][4] = ss[3]
Y[i][0] = ss[4]
return X,Y
def sign(X,W):
w = np.matrix(W)
x = np.matrix(X)
if np.dot(W,X) >0:
#print (W)
return 1
else:
return -1
def pla(l,s,W,randomlist):
X,Y = getDataset(l,s)
iteration = 1
updates = 0
while(iteration!=0):
iteration = 0
random.shuffle(randomlist)
for i in randomlist:
if sign(X[i],W) != Y[i]:
W = W+Y[i]*X[i]*0.25
iteration +=1
#break
updates += iteration
return updates
if __name__=='__main__':
with open('result.txt','r') as f:
s = f.readlines()
l = len(s) #行数
m=[]
rl = list(range(l))
for i in range(2000):
W = np.zeros(5)
m.append(pla(l,s,W,rl))
mm = np.array(m)
print(np.mean(mm))
18、19、20
import numpy as np
import random
def getDataset(l,s):
X = np.zeros((l,5)) #多一个x0=1
Y = np.zeros((l,1))
for i in range(l):
ss = s[i].strip().split()
#print (type(ss[0]))
X[i][0] = 1
X[i][1] = ss[0]
X[i][2] = ss[1]
X[i][3] = ss[2]
X[i][4] = ss[3]
Y[i][0] = ss[4]
return X,Y
def sign(X,W):
if np.dot(W,X) >0:
return 1
else:
return -1
def mistake(W,X,Y):
cnt = 0
ls= len(X)
for i in range(ls):
res = sign(X[i],W)
if res!=Y[i]:
cnt +=1
return cnt/ls
def pla(l,s,W,randomlist):
X,Y = getDataset(l,s)
Wt = W
updates = 100
while(updates>0):
iteration = 0
random.shuffle(randomlist)
for i in randomlist:
if sign(X[i],Wt) != Y[i]: #这里比较的是Wt 就是一直更新的线 而不是最好的线 最好的W是在不断更新中保存的
Wt = Wt+Y[i]*X[i]
#print(Wt,W)
miscnt1= mistake(Wt,X,Y) #现有线向量的错误率
miscnt2 = mistake(W,X,Y) #原有线向量的错误率
updates -= 1
if miscnt1<miscnt2:
W = Wt
if updates==0:
break
if updates ==0:
break
return W
if __name__=='__main__':
with open('result.txt','r') as f:
s = f.readlines()
with open('mytest.txt','r') as ff:
ss =ff.readlines()
l = len(s) #行数
ll = len(ss)
tstX, tstY = getDataset(ll, ss)
m =[]
rl = list(range(l)) #l索引list
for i in range(20):
W = np.zeros(5)
Wm = pla(l,s,W,rl)
m.append(mistake(Wm,tstX,tstY))
a = np.array(m)
print(a.mean())
感悟:debug de的我已经是个dead person了