机器学习基石 作业3 Logistic回归/线性回归/特征转换

本文探讨了机器学习中的Logistic回归和线性回归,并提供了实际的数据集链接供读者进行训练和测试。同时,文章也强调了特征转换在模型中的重要性。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Logistic的训练数据和测试数据

https://d396qusza40orc.cloudfront.net/ntumlone%2Fhw3%2Fhw3_train.dat 
https://d396qusza40orc.cloudfront.net/ntumlone%2Fhw3%2Fhw3_test.dat


# -*- coding: utf-8 -*-

"""
__title__ = 'Linear.py'
__author__ = 'w1d2s'
__mtime__ = '2015/10/22'
"""

from numpy import *
from scipy import linalg
import random

def Data_Generator(size):
    X = zeros([size, 2])
    Y = zeros(size)
    for i in range(0, size):
        X[i, :] = [random.uniform(-1, 1), random.uniform(-1, 1)]
        tmp = random.uniform(0, 1)
        if tmp <= 0.1:
            Y[i] = -1 * int(sign(X[i, 0] * X[i, 0] + X[i, 1] * X[i, 1] - 0.6))
        else:
            Y[i] = int(sign(X[i, 0] * X[i, 0] + X[i, 1] * X[i, 1] - 0.6))
    (dataSize, dataDim) = X.shape
    X_ = ones([dataSize, dataDim + 1])
    X_[:, 1: dataDim + 1] = X
    return (X_, X, Y)

def Err_Counter(X, Y, W):
    dataSize = len(Y)
    ErrCnt = 0
    for i in range(0, dataSize):
        if Y[i] * dot(X[i, :], W) <= 0:
            ErrCnt = ErrCnt + 1
    return ErrCnt

def Feature_Transform(X):
    dataSize = len(X)
    Z = ones([dataSize, 6])
    Z[:, 1: 3] = X[:, 0: 2]
    Z[:, 3] = X[:, 0] * X[:, 1]
    Z[:, 4] = X[:, 0] * X[:, 0]
    Z[:, 5] = X[:, 1] * X[:, 1]
    return Z

def Linear_Regression(X, Y):
    X_pinv = linalg.pinv(X)
    W_lin = dot(X_pinv, Y)
    return W_lin

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = 'Logistic.py'
__author__ = 'w1d2s'
__mtime__ = '2015/10/22'
"""

from numpy import *
import random

def theta(s):
    return 1/(1 + exp(-s))

def Logistic_Regression(X, Y, eta, T, isRandom):

    (dataSize, dataDim) = X.shape
    W = zeros(dataDim)
    i = -1
    for t in range(0, T):
        if isRandom:
            i = random.randint(0, dataSize - 1)
        else:
            i = (i + 1) % dataSize
        W = W + eta * theta(-Y[i] * dot(W, X[i, :])) * Y[i] * X[i, :]
    return W

def Logistic_Err_Counter(X, Y, W):
    Err = 0
    dataSize = len(X)
    for i in range(0, dataSize):
        if dot(X[i, :], W) * Y[i] < 0:
            Err += 1
    return Err



#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = 'HW 03 main.py'
__author__ = 'w1d2s'
__mtime__ = '2015/10/22'
"""
from numpy import *
from Linear import *
from Logistic import *
import sys
import string

def Data_Pretreatment(path):
    rawData = open(path).readlines()
    #print rawData
    dataNum = len(rawData)
    dataDim = len(rawData[0].strip().split(' ')) - 1
    dataIdx = 0
    X = zeros([dataNum, dataDim])
    Y = zeros(dataNum)
    print(dataNum, dataDim)
    for line in rawData:
        tempList = line.strip().split(' ')
        Y[dataIdx] = string.atoi(tempList[dataDim])
        X[dataIdx, :] = tempList[0: dataDim]
        dataIdx += 1
    return (X, Y)

if __name__ == '__main__':
    '''
    AveErr = 0
    for i in range(0, 1000):
        (X_, X, Y) = Data_Generator(1000)
        W_lin = Linear_Regression(X_, Y)
        ErrCnt = Err_Counter(X_, Y, W_lin)
        AveErr += ErrCnt
        print ErrCnt
    print AveErr/1000
        '''
    '''
    AveErr = 0
    W = [ -9.93766830e-01,  3.95748989e-04 ,  1.60224660e-03,  -4.92090403e-04, 1.55908941e+00,   1.56285848e+00]
    for i in range(0, 1000):
        (X_, X, Y) = Data_Generator(1000)
        Z = Feature_Transform(X)
        ErrCnt = Err_Counter(Z, Y, W)
        AveErr += ErrCnt
        print ErrCnt
    print AveErr/1000
    '''
    X, Y = Data_Pretreatment('train.dat')
    X_t, Y_t = Data_Pretreatment('test.dat')
    W_ave = zeros(20)
    for i in range(0, 50):
        W = Logistic_Regression(X, Y, 0.001, 2000, False)
        W_ave = W + W_ave
    W_ave = W_ave/50
    Err = Logistic_Err_Counter(X_t, Y_t, W_ave)
    print W_ave
    print Err



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值