Logistic的训练数据和测试数据
https://d396qusza40orc.cloudfront.net/ntumlone%2Fhw3%2Fhw3_train.dat
https://d396qusza40orc.cloudfront.net/ntumlone%2Fhw3%2Fhw3_test.dat
# -*- coding: utf-8 -*-
"""
__title__ = 'Linear.py'
__author__ = 'w1d2s'
__mtime__ = '2015/10/22'
"""
from numpy import *
from scipy import linalg
import random
def Data_Generator(size):
X = zeros([size, 2])
Y = zeros(size)
for i in range(0, size):
X[i, :] = [random.uniform(-1, 1), random.uniform(-1, 1)]
tmp = random.uniform(0, 1)
if tmp <= 0.1:
Y[i] = -1 * int(sign(X[i, 0] * X[i, 0] + X[i, 1] * X[i, 1] - 0.6))
else:
Y[i] = int(sign(X[i, 0] * X[i, 0] + X[i, 1] * X[i, 1] - 0.6))
(dataSize, dataDim) = X.shape
X_ = ones([dataSize, dataDim + 1])
X_[:, 1: dataDim + 1] = X
return (X_, X, Y)
def Err_Counter(X, Y, W):
dataSize = len(Y)
ErrCnt = 0
for i in range(0, dataSize):
if Y[i] * dot(X[i, :], W) <= 0:
ErrCnt = ErrCnt + 1
return ErrCnt
def Feature_Transform(X):
dataSize = len(X)
Z = ones([dataSize, 6])
Z[:, 1: 3] = X[:, 0: 2]
Z[:, 3] = X[:, 0] * X[:, 1]
Z[:, 4] = X[:, 0] * X[:, 0]
Z[:, 5] = X[:, 1] * X[:, 1]
return Z
def Linear_Regression(X, Y):
X_pinv = linalg.pinv(X)
W_lin = dot(X_pinv, Y)
return W_lin
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = 'Logistic.py'
__author__ = 'w1d2s'
__mtime__ = '2015/10/22'
"""
from numpy import *
import random
def theta(s):
return 1/(1 + exp(-s))
def Logistic_Regression(X, Y, eta, T, isRandom):
(dataSize, dataDim) = X.shape
W = zeros(dataDim)
i = -1
for t in range(0, T):
if isRandom:
i = random.randint(0, dataSize - 1)
else:
i = (i + 1) % dataSize
W = W + eta * theta(-Y[i] * dot(W, X[i, :])) * Y[i] * X[i, :]
return W
def Logistic_Err_Counter(X, Y, W):
Err = 0
dataSize = len(X)
for i in range(0, dataSize):
if dot(X[i, :], W) * Y[i] < 0:
Err += 1
return Err
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = 'HW 03 main.py'
__author__ = 'w1d2s'
__mtime__ = '2015/10/22'
"""
from numpy import *
from Linear import *
from Logistic import *
import sys
import string
def Data_Pretreatment(path):
rawData = open(path).readlines()
#print rawData
dataNum = len(rawData)
dataDim = len(rawData[0].strip().split(' ')) - 1
dataIdx = 0
X = zeros([dataNum, dataDim])
Y = zeros(dataNum)
print(dataNum, dataDim)
for line in rawData:
tempList = line.strip().split(' ')
Y[dataIdx] = string.atoi(tempList[dataDim])
X[dataIdx, :] = tempList[0: dataDim]
dataIdx += 1
return (X, Y)
if __name__ == '__main__':
'''
AveErr = 0
for i in range(0, 1000):
(X_, X, Y) = Data_Generator(1000)
W_lin = Linear_Regression(X_, Y)
ErrCnt = Err_Counter(X_, Y, W_lin)
AveErr += ErrCnt
print ErrCnt
print AveErr/1000
'''
'''
AveErr = 0
W = [ -9.93766830e-01, 3.95748989e-04 , 1.60224660e-03, -4.92090403e-04, 1.55908941e+00, 1.56285848e+00]
for i in range(0, 1000):
(X_, X, Y) = Data_Generator(1000)
Z = Feature_Transform(X)
ErrCnt = Err_Counter(Z, Y, W)
AveErr += ErrCnt
print ErrCnt
print AveErr/1000
'''
X, Y = Data_Pretreatment('train.dat')
X_t, Y_t = Data_Pretreatment('test.dat')
W_ave = zeros(20)
for i in range(0, 50):
W = Logistic_Regression(X, Y, 0.001, 2000, False)
W_ave = W + W_ave
W_ave = W_ave/50
Err = Logistic_Err_Counter(X_t, Y_t, W_ave)
print W_ave
print Err