机器学习

本文介绍使用Python实现留一法和十折交叉验证对iris和wine数据集进行三分类的过程。主要讨论了算法实现细节,特别是在十折交叉验证中采用分层抽样的方法,并分享了在调试过程中遇到的问题及解决策略。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

用python实现了留一法和十折交叉检验来对iris和wine数据集进行三分类。算法实现起来不难,但是由于第一次写,迭代函数的封装性不好(因为假设了类别在最后一列)。主要注意的是十折交叉检验时候分层抽样。因为两个数据集都是比较整齐,一二三类分为三块。所以这里用数据集每一行元组的下标除以10的余数作为其所处的块数。其余就没有什么了。只是因为先做的iris,数据集数量特别好,就特别蠢的假设总数是10和类别的倍数。故对wine进行分类时候,又重写了这个方法。
另外迭代次数只是简单设置为5词就得到了很好的模型,就没有再对其进行调整。
在调试过程中遇到了最后模型出现nan的问题,主要是因为迭代次数太多,导致不断迭代使得模型中数据不断变小,因为计算机精度有限,所以最后使迭代过程中0作为被除数因而出现nan的问题。
对wine数据集分类代码如下:

import os
import numpy
import string
import math

def xz(x):
    x2=numpy.row_stack((x,1))
    return x2

def betaInit(m):
    x=numpy.zeros(shape=(m.shape[1],1))
    return x;
def p1(xz,beta):
    mul=numpy.dot(beta.T,xz)[0][0]
    t=math.exp(mul)
    return t/(1+t)
def p0(xz,beta):
    mul = numpy.dot(beta.T, xz)[0][0]
    t = math.exp(mul)
    return 1/(1+t)
def arrayToMatrix(a):
    m=numpy.ndarray(shape=(0,1))
    for i in a:
        m=numpy.row_stack((m,i))
    return m;
def iteration(beta,m,start,end,rPos): #改变
    dbeta=numpy.zeros(shape=(m.shape[1],1))
    d2beta=numpy.zeros(shape=(m.shape[1],m.shape[1]))
    for a in m:
        xiz=xz(arrayToMatrix(a[start:end]))
        p=p1(xiz,beta)
        d1=xiz*(a[rPos]-p)
        dbeta=dbeta-d1
        mt=numpy.dot(xiz,xiz.T)
        d2=mt*p*(1-p)
        d2beta=d2beta+d2
    return beta-numpy.dot(numpy.linalg.inv(d2beta),dbeta)
def y(beta,x):
    t=(numpy.dot(beta.T,x))
    return 1/(1+math.exp(-t[0][0]))
def train(m,start,end,rPos):
    beta=betaInit(m)
    for i in range(5):
        beta=iteration(beta,m,start,end,rPos)
    return beta
def trainNum(m,num,start,end,rPos):
    mt=m.copy()#这里copy了一份测试集,为了不破坏原来测试集
    for i in range(mt.shape[0]):
        if mt[i,rPos]!=num:
            mt[i,rPos]=0
        else:
            mt[i,rPos]=1
    return train(mt,start,end,rPos)
def newKFold(m,k):
    newm=numpy.ndarray(shape=(0,m.shape[1]))
    for i in range(k):
        t=i
        while t<m.shape[0]:
            newm=numpy.row_stack((newm,m[t]))
            t=t+k
    return newm



file=open('E:\\wine.txt')
mylist=file.readlines()
m=numpy.ndarray(shape=(0,14))
for str in mylist:
    if str.find(",")<0:
        break
    testa=[]
    str=str.split(",")
    for arg in str:
        testa.append(float(arg))
    m=numpy.row_stack((m,testa))
for a in m:
    print (a)
'''
k折交叉检验
'''

k=10
c=3
sum=m.shape[0]
numeach=m.shape[0]/k #15
mt=newKFold(m,k).copy()
resultAray1 = numpy.zeros(shape=(mt.shape[0],1))
E=0
err=0
for i in range(k):
    testm1 = numpy.ndarray(shape=(0, m.shape[1]))
    trainm1 = numpy.ndarray(shape=(0, m.shape[1]))
    testm1=numpy.row_stack((testm1,mt[int(i*numeach):int((i+1)*numeach)]))
    trainm1=numpy.row_stack((trainm1,mt[0:int(i*numeach)]))
    trainm1=numpy.row_stack((trainm1,mt[int((i+1)*numeach):mt.shape[0]]))

    beta1=trainNum(trainm1,1,1,mt.shape[0],0)
    beta2=trainNum(trainm1,2,1,mt.shape[0],0)
    beta3=trainNum(trainm1,3,1,mt.shape[0],0)

    for j in range(testm1.shape[0]):
        r1 = y(beta1, numpy.row_stack((arrayToMatrix(testm1[j, 1:len(m[0])]), 1)))
        r2 = y(beta2, numpy.row_stack((arrayToMatrix(testm1[j, 1:len(m[0])]), 1)))
        r3 = y(beta3, numpy.row_stack((arrayToMatrix(testm1[j, 1:len(m[0])]), 1)))
        if (isMax(r1, r2, r3) == 0 ):
            E=E+(r1-mt[int(i*numeach+j),0])**2
            resultAray1[int(i*numeach+j)] = 1
        elif (isMax(r2, r1, r3) == 0 ):
            E = E + (r2 - mt[int(i * numeach + j), 0]) ** 2
            resultAray1[int(i*numeach+j)] = 2
        else :
            E = E + (r3 - mt[int(i * numeach + j), 0]) ** 2
            resultAray1[int(i*numeach+j)] = 3
for i in range(mt.shape[0]):
    if mt[i,0]!=resultAray1[i]:
        err=err+1
    print(mt[i],resultAray1[i])
print("k折检验法,E:",E/mt.shape[0],"错误率: ",err/mt.shape[0])

'''
留一法检验
'''

resultAray2 = numpy.zeros(shape=(m.shape[0],1))
E=0
err=0
for i in range(m.shape[0]):
    testm2 = numpy.ndarray(shape=(0, m.shape[1]))
    trainm2 = numpy.ndarray(shape=(0, m.shape[1]))
    testm2 = numpy.row_stack((testm2,m[i]))
    trainm2 = numpy.row_stack((trainm2,m[0:i]))
    trainm2 =  numpy.row_stack((trainm2,m[i+1:m.shape[0]]))

    beta1 = trainNum(trainm2, 1,1,m.shape[1],0)
    beta2 = trainNum(trainm2, 2,1,m.shape[1],0)
    beta3 = trainNum(trainm2, 3,1,m.shape[1],0)

    for j in range(testm2.shape[0]):
        r1 = y(beta1, numpy.row_stack((arrayToMatrix(testm2[j, 1:len(m[0]) ]), 1)))
        r2 = y(beta2, numpy.row_stack((arrayToMatrix(testm2[j, 1:len(m[0]) ]), 1)))
        r3 = y(beta3, numpy.row_stack((arrayToMatrix(testm2[j, 1:len(m[0])]), 1)))
        if (isMax(r1, r2, r3) == 0):
            E=E+(r1-m[i][0])**2
            resultAray2[i] = 1
        elif (isMax(r2, r1, r3) == 0):
            E = E + (r2 - m[i][0]) ** 2
            resultAray2[i] = 2
        else:
            E = E + (r3 - m[i][0]) ** 2
            resultAray2[i] = 3
for i in range(m.shape[0]):
    if m[i,0]!=resultAray2[i]:
        err=err+1
    print(m[i], resultAray2[i])
print("留一法,E:",E/m.shape[0],"错误率: ",err/m.shape[0])

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值