PLA code

最新推荐文章于 2022-05-09 20:28:36 发布

原创最新推荐文章于 2022-05-09 20:28:36 发布 · 515 阅读

1 ·

CC 4.0 BY-SA版权

文章标签：

#python #机器学习

机器学习同时被 2 个专栏收录

30 篇文章

订阅专栏

python

29 篇文章

订阅专栏

本文介绍了一个基于Python实现的感知机学习算法，通过随机梯度下降法进行权重更新，并使用训练数据集验证了算法的有效性。此外，还展示了如何用该算法处理测试数据并评估其错误率。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

感知机

参考自机器学习基石

# /usr/bin/env python2.7
# encoding=utf-8
import numpy as np
import random,os


def verify(weight,array_x,array_y):
    '''
    verify prediction
    :param weight: itered weight
    :param array_x: x
    :param array_y: y
    :return: true or false
    '''
    sum_ok = 0

    for i in range(len(array_y)):
        if sum(weight*array_x[i])*array_y[i] > 0:
            sum_ok = sum_ok + 1
    print("%d data is classified ok!" % sum_ok)
    if sum_ok == len(array_y):
        return True
    else:
        return False

# filepath
ROOT_PATH = os.path.dirname(os.path.realpath(__file__))
## read data
x=[]
y=[]
path_trainfile = '%s/traindata' % ROOT_PATH
with open(path_trainfile,'r') as infile:
    for n,lines in enumerate(infile):
        if n % 100 == 0:
            print("%d lines readed !" % n)
        line = lines.strip().split('\t')
        y.append(int(line[1]))
        num_x = [float(ss) for ss in line[0].split(' ')]
        x.append(num_x)

# ======================================PLA=================================================
# init
array_x = np.array(x)
array_y = np.array(y)
weight = np.array([0,0,0,0])
sign_init = -1
num_datasets = len(array_y)
update = 0
#reandom
## random sample
num_random=[random.randint(0,num_datasets-1) for i in range(num_datasets)]
## ordered sample
#num_random = [i for i in range(num_datasets)]

# train
print ("we has %d datasets!" % num_datasets)
for iteration in range(num_datasets):
    if iteration == 0:
        weight = weight + 0.5*array_y[iteration] * array_x[iteration]
        update += 1
    else:
        random_iter = num_random[iteration]
        if sum(weight*array_x[random_iter]) * array_y[random_iter] < 0:
            # w = w + 0.5 + x * y 
            weight = weight + 0.5*array_y[random_iter] * array_x[random_iter]
            update += 1
            if verify(weight,array_x,array_y):
                break
            # if update >=100 :
            #     break
    # print ( weight)


print ("iter : %d " % (iteration+1))
print ("update : %d " % (update))

# test
x=[]
y=[]
count = 0

path_trainfile = '%s/testdata' % ROOT_PATH
with open(path_trainfile,'r') as infile:
    for n,lines in enumerate(infile):
        if n % 100 == 0:
            print("%d lines readed !" % n)
        line = lines.strip().split('\t')
        y.append(int(line[1]))
        num_x = [float(ss) for ss in line[0].split(' ')]
        x.append(num_x)


array_x = np.array(x)
array_y = np.array(y)

num_datasets = len(array_y)
for i in range(num_datasets):
    # verify
    if sum(weight*array_x[i])*array_y[i] > 0:
        count = count + 1

rate_error = (num_datasets-count) / float(num_datasets)
print("The error rate is %f" % rate_error)