import numpy as np
import random
def genData(pointCont, bias, variance):
"""
x是多个二维的点,沿着y=x+b直线附近分布,b为bias,
variance为y的基础偏差,总偏差为基础偏差+随机偏差
:param pointCont: 生成的点的数量
:param bias: 结果的偏差
:param variance:
:return: x:平面上的一系列点,y是对应点的标志
"""
x = np.zeros(shape=(pointCont, 2))
y = np.zeros(shape=(pointCont))
for i in range(0, pointCont):
x[i][0] = 1
x[i][1] = i
y[i] = (i + bias) + random.uniform(0, 1) + variance
return x, y
def gradientDescent(x, y, theta, alpha, itemsCont, iters):
"""
min cost :cost = sum(loss**2)/2m
= sum((h-y)**2)/2m
= sum (x*theta - y)**2/2m
梯度:D(cost) = sum 2*(x*theta - y) * theta/2m
= sum 2*loss * theta/2m
= sum loss*theta/m
:param x:
:param y:
:param theta: 初始权重参数
:param alpha: 学习率
:param itemsCont: 数据集大小
:param iters: 迭代次数
:return: 新的权重
"""
xTran = np.transpose(x)
for i in range(iters):
hypothesis = np.dot(x, theta)
loss = hypothesis - y
cost = np.sum(loss**2)/(2*itemsCont)
gradient = np.dot(xTran, loss)/itemsCont
theta = theta - alpha*gradient
return theta
x, y = genData(100,25,10)
print(x, y)
theta = np.ones(2)
theta = gradientDescent(x, y, theta, 0.0005, 100, 10000)
print(theta)