线性回归模型
y
^
=
f
w
,
b
(
x
)
=
w
x
+
b
\hat{y}=f_{w,b}(x) =wx+b
y^=fw,b(x)=wx+b
代价函数 cost function
平均平方误差函数 squared error function
J
(
w
,
b
)
=
1
2
m
∑
i
=
1
m
(
y
^
(
i
)
−
y
(
i
)
)
2
J(w,b)=\frac{1}{2m}\sum_{i=1}^{m}(\hat{y}^{(i)}-y^{(i)})^2
J(w,b)=2m1i=1∑m(y^(i)−y(i))2
计算偏导
∂
∂
w
J
(
w
,
b
)
=
∂
∂
w
1
2
m
∑
i
=
1
m
(
w
x
(
i
)
+
b
−
y
(
i
)
)
2
=
1
2
m
∑
i
=
1
m
2
(
w
x
(
i
)
+
b
−
y
(
i
)
)
x
(
i
)
=
1
m
∑
i
=
1
m
x
(
i
)
(
w
x
(
i
)
+
b
−
y
(
i
)
)
\frac{\partial}{\partial{w}}J(w,b)=\frac{\partial}{\partial{w}}\frac{1}{2m}\sum_{i=1}^{m}(wx^{(i)}+b-y^{(i)})^2\\=\frac{1}{2m}\sum_{i=1}^{m}2(wx^{(i)}+b-y^{(i)})x^{(i)}=\frac{1}{m}\sum_{i=1}^{m}x^{(i)}(wx^{(i)}+b-y^{(i)})
∂w∂J(w,b)=∂w∂2m1i=1∑m(wx(i)+b−y(i))2=2m1i=1∑m2(wx(i)+b−y(i))x(i)=m1i=1∑mx(i)(wx(i)+b−y(i))
∂
∂
b
J
(
w
,
b
)
=
∂
∂
b
1
2
m
∑
i
=
1
m
(
w
x
(
i
)
+
b
−
y
(
i
)
)
2
=
1
2
m
∑
i
=
1
m
2
(
w
x
(
i
)
+
b
−
y
(
i
)
)
=
1
m
∑
i
=
1
m
(
w
x
(
i
)
+
b
−
y
(
i
)
)
\frac{\partial}{\partial{b}}J(w,b)=\frac{\partial}{\partial{b}}\frac{1}{2m}\sum_{i=1}^{m}(wx^{(i)}+b-y^{(i)})^2\\=\frac{1}{2m}\sum_{i=1}^{m}2(wx^{(i)}+b-y^{(i)})=\frac{1}{m}\sum_{i=1}^{m}(wx^{(i)}+b-y^{(i)})
∂b∂J(w,b)=∂b∂2m1i=1∑m(wx(i)+b−y(i))2=2m1i=1∑m2(wx(i)+b−y(i))=m1i=1∑m(wx(i)+b−y(i))
梯度下降
w
=
w
−
α
∂
J
∂
w
,
b
=
b
−
α
∂
J
∂
w
w=w-\alpha\frac{\partial{J}}{\partial{w}},b=b-\alpha\frac{\partial{J}}{\partial{w}}
w=w−α∂w∂J,b=b−α∂w∂J
import nummpy as np
#代价函数
def compute_cost(x, y, w, b):
m = x.shape[0]
cost = 0
for i in range(m):
f_wb = w * x[i] + b
cost = cost + (f_wb - y[i])**2
total_cost = 1 / (2 * m) * cost
return total_cost
#计算梯度函数
def compute_gradient(x, y, w, b):
m = x.shape[0]
dj_dw = 0
dj_db = 0
for i in range(m):
f_wb = w * x[i] + b
dj_dw_i = (f_wb - y[i]) * x[i]
dj_db_i = f_wb - y[i]
dj_db += dj_db_i
dj_dw += dj_dw_i
dj_dw = dj_dw / m
dj_db = dj_db / m
return dj_dw, dj_db
#梯度下降函数
def gradient_descent(x, y, w_in, b_in, alpha, num_iters, cost_function, gradient_function):
'''
x:输入向量,numpy.ndarray
y:输出向量,numpy.ndarray
w_in:初始w
b_in:初始b
alpha:学习率
num_iters:迭代次数
cost_function:代价函数
gradient_function:计算梯度函数
'''
J_history = [] #记录训练过程中的所有代价
p_history = [] #记录训练过程中所有(w,b)
b = b_in
w = w_in
for i in range(num_iters):
# 计算偏导,更新参数w,b
dj_dw, dj_db = gradient_function(x, y, w , b)
b = b - alpha * dj_db
w = w - alpha * dj_dw
# 保存当前代价J和参数(w,b)->可用于后续可视化
J_history.append( cost_function(x, y, w , b))
p_history.append([w,b])
# 打印其中十次训练信息
if i% math.ceil(num_iters/10) == 0:
print(f"Iteration {i}: Cost {J_history[-1]} ",
f"dj_dw: {dj_dw}, dj_db: {dj_db} ",
f"w: {w}, b:{b}")
print{f'final w:{w},b:{b}'}
#输出目标值与预测值对比
y_hat=w*x+b
for i in range(x.shape[0]):
print(f'target value:{y[i]},predicted value:{y_hat[i]},error:{y[i]-y_hat[i]}')
return w, b, J_history, p_history