2 Logistc回归
2.1 Logistc回归
h w , b = σ ( g ( x ) ) = 1 1 + e x p − ( w T x + b ) h_{w,b}=\sigma (g(x)) = \frac{1}{1+exp^{-(w^Tx+b)}} hw,b=σ(g(x))=1+exp−(wTx+b)1
w = ( b , w 1 , w 2 , . . . , w n ) T w = (b,w1,w2,...,w_n)^T w=(b,w1,w2,...,wn)T
x = ( 1 , x 1 , x 2 , . . . , x n ) T x = (1,x1,x2,...,x_n)^T x=(1,x1,x2,...,xn)T
z = g ( x ) = w T x z = g(x)=w^Tx z=g(x)=wTx
P ( y = 1 ∣ x ; θ ) = h θ ( x ) P(y=1|x;\theta)=h_\theta(x) P(y=1∣x;θ)=hθ(x)
P ( y = 0 ∣ x ; θ ) = 1 − h θ ( x ) P(y=0|x;\theta)=1-h_\theta(x) P(y=0∣x;θ)=1−hθ(x)
似然函数
L ( w ) = P ( y ∣ x : w ) = ∏ i = 1 n P ( y i ∣ x i ; w ) = ∏ i = 1 n ( h w ( x i ) ) y i ( 1 − h w ( x i ) ) ( 1 − y i ) L(w)=P(y|x:w)=\prod_{i=1}^nP(y^i|x^i;w)=\prod_{i=1}^n(h_w(x^i))^{y^i}(1-h_w(x^i))^{(1-y^i)} L(w)=P(y∣x:w)=i=1∏nP(yi∣xi;w)=i=1∏n(hw(xi))yi(1−hw(xi))(1−yi)
损失函数
l
(
w
)
=
−
1
m
l
n
(
L
(
w
)
)
=
∑
i
m
(
y
i
l
n
(
h
w
(
x
i
)
)
+
(
1
−
y
i
)
l
n
(
1
−
h
w
(
x
i
)
)
)
l(w)=-\frac{1}{m}ln(L(w))=\sum_i^m (y_iln(h_w(x_i))+ (1-y_i)ln(1-h_w(x_i)))
l(w)=−m1ln(L(w))=i∑m(yiln(hw(xi))+(1−yi)ln(1−hw(xi)))
w的导数
α
J
(
w
)
w
j
=
−
1
m
α
∑
i
m
(
y
i
l
n
(
h
w
(
x
i
)
)
+
(
1
−
y
i
)
l
n
(
1
−
h
w
(
x
i
)
)
)
w
j
\frac{\alpha J(w)}{w_j}=-\frac{1}{m}\frac{\alpha \sum_i^m (y_iln(h_w(x_i))+ (1-y_i)ln(1-h_w(x_i)))}{w_j}
wjαJ(w)=−m1wjα∑im(yiln(hw(xi))+(1−yi)ln(1−hw(xi)))
= − 1 m ( ∑ i m ( y i α l n h w ( x i ) α w j + ( 1 − y i ) α l n ( 1 − h w ( x i ) ) α w j ) ) = -\frac{1}{m}(\sum_i^m(y_i \frac{\alpha lnh_w(x_i)}{\alpha w_j} +(1-y_i) \frac{\alpha ln(1-h_w(x_i))}{\alpha w_j} )) =−m1(i∑m(yiαwjαlnhw(xi)+(1−yi)αwjαln(1−hw(xi))))
= − 1 m ( ∑ i m ( y i 1 h w ( x i ) α h w ( x i ) z i α z i w j + ( 1 − y i ) 1 1 − h w ( x i ) − α h w ( x i ) z i α z i w j ) = -\frac{1}{m}(\sum_i^m(y_i \frac{1}{h_w(x_i)}\frac{\alpha h_w(x_i)}{z_i}\frac{\alpha z_i}{w_j} +(1-y_i)\frac{1}{1-h_w(x_i)}\frac{-\alpha h_w(x_i)}{z_i}\frac{\alpha z_i}{w_j}) =−m1(i∑m(yihw(xi)1ziαhw(xi)wjαzi+(1−yi)1−hw(xi)1zi−αhw(xi)wjαzi)
= − 1 m ( ∑ i m ( y i h w ( x i ) ( 1 − h w ( x i ) ) h w ( x i ) + ( 1 − y i ) h w ( x i ) ( 1 − h w ( x i ) ) 1 − h w ( x i ) ) α z i w j ) = -\frac{1}{m}(\sum_i^m(y_i \frac{h_w(x_i)(1-h_w(x_i))}{h_w(x_i)} +(1-y_i) \frac{h_w(x_i)(1-h_w(x_i))}{1-h_w(x_i)} )\frac{\alpha z_i}{w_j}) =−m1(i∑m(yihw(xi)hw(xi)(1−hw(xi))+(1−yi)1−hw(xi)hw(xi)(1−hw(xi)))wjαzi)
= − 1 m ∑ i m ( y i − h w ( x i ) ) α z i w j ) = -\frac{1}{m}\sum_i^m(y_i-h_w(x_i))\frac{\alpha z_i}{w_j}) =−m1i∑m(yi−hw(xi))wjαzi)
= 1 m ∑ i m ( h w ( x i ) − y i ) x i j = \frac{1}{m}\sum_i^m(h_w(x_i)-y_i)x_{ij} =m1i∑m(hw(xi)−yi)xij
w : w − η α J ( w ) w j w: w-\eta \frac{\alpha J(w)}{w_j} w:w−ηwjαJ(w)
'''
Logistic Regression
(y_pre -y)x
'''
import numpy as np
class LogisticRegression:
def __init__(self,n_iter=500, eta=1e-3, tol=None):
self.n_iter = n_iter
self.eta = eta
self.tol = tol
self.w = None
def _process_data(self,X):
m,n = X.shape
X_ = np.ones([m,n+1])
X_[:,1:]=X
return X_
def _sigmoid(self,z):
return 1.0/(1.0+np.exp(-z))
def _predict_prob(self,X,w): # 1 处理数据;2 线性+激活
z = np.matmul(X,w) # [m,n+1] [n+1,1]
return self._sigmoid(z)
def predict(self, X):
X= self._process_data(X)
o = self._predict_prob(X,self.w)
print(o.shape)
return np.where(o > 0.5, 1, 0)
def _loss(self,y,y_pre):
return -np.sum(y*np.log(y_pre)+(1-y)*np.log(1-y_pre))/y.size
def gradient(self,X,y,w): # w -= eta * (X.T *(y_pre-y)) [m,1] [m,n+1]
if self.tol:
loss_old = np.inf
loss_list = []
for _ in range(self.n_iter):
y_pre = self._predict_prob(X,w)
loss = self._loss(y,y_pre)
loss_list.append(loss)
if _%100 == 0:
print(loss)
if self.tol:
if loss_old - loss <self.tol:
break
loss_old = loss
w -= self.eta * np.matmul(X.T,(y_pre-y))
def train(self,X_train,y_train):
X_train = self._process_data(X_train)
m,n = X_train.shape # [20,3]
self.w = np.random.random(n).reshape([-1,1]) # [3,1]
self.gradient(X_train,y_train,self.w)
if __name__ == '__main__':
x = np.random.random(10)
x1 = np.array([x*1+2,x*1+3])
x2 = np.array([x*2+5,x*2+7])
X_train = np.concatenate([x1,x2],axis=0).reshape([-1,2])
y_train = np.zeros(20).reshape([-1,1])
y_train[10:] = 1
log = LogisticRegression(n_iter=3000, eta=1e-3, tol=1e-5)
log.train(X_train, y_train)
w = log.w
y_pre = log.predict(X_train)
2.2 Softmax回归
Softmax回归处理多元分类问题
z
j
=
g
j
(
x
)
=
w
j
T
x
z_j=g_j(x)=w_j^Tx
zj=gj(x)=wjTx
W = ( w 1 T w 2 T ⋮ w K T ) W =\begin{pmatrix} w_1^T \\ w_2^T\\ \vdots\\w_K^T\\ \end{pmatrix} \quad W=⎝⎜⎜⎜⎛w1Tw2T⋮wKT⎠⎟⎟⎟⎞
z = g ( x ) = W x = ( z 1 z 2 ⋮ z k ) z =g(x)=Wx=\begin{pmatrix} z_1 \\ z_2\\ \vdots\\z_k\\ \end{pmatrix} \quad z=g(x)=Wx=⎝⎜⎜⎜⎛z1z2⋮zk⎠⎟⎟⎟⎞
σ ( z ) = ( σ ( z 1 ) σ ( z 2 ) ⋮ σ ( z k ) ) \sigma(z)=\begin{pmatrix} \sigma(z_1) \\ \sigma(z_2)\\ \vdots\\\sigma(z_k)\\ \end{pmatrix} \quad σ(z)=⎝⎜⎜⎜⎛σ(z1)σ(z2)⋮σ(zk)⎠⎟⎟⎟⎞
σ ( z ) j = e z j ∑ k = 1 K e z k \sigma(z)_j=\frac{e^{z_j}}{\sum_{k=1}^Ke^{z_k}} σ(z)j=∑k=1Kezkezj
h w ( x ) = σ ( g ( x ) ) = 1 ∑ k = 1 K e w k T x ( z 1 z 2 ⋮ z k ) h_w(x)=\sigma(g(x))=\frac{1}{\sum_{k=1}^Ke^{w_k^Tx}}\begin{pmatrix} z_1 \\ z_2\\ \vdots\\z_k\\ \end{pmatrix} \quad hw(x)=σ(g(x))=∑k=1KewkTx1⎝⎜⎜⎜⎛z1z2⋮zk⎠⎟⎟⎟⎞
J ( w ) = − 1 m ∑ i = 1 m ∑ j = 1 K I ( y i = j ) l n h w ( x i ) j J(w)= -\frac{1}{m}\sum_{i=1}^m\sum_{j=1}^K I(y_i=j) ln h_w(x_i)_j J(w)=−m1i=1∑mj=1∑KI(yi=j)lnhw(xi)j
δ J ( w ) = 1 m ∑ i = 1 m ( h w ( x i ) j − I ( y i = j ) ) x i \delta J(w)= \frac{1}{m}\sum_{i=1}^m(h_w(x_i)_j-I(y_i=j)) x_i δJ(w)=m1i=1∑m(hw(xi)j−I(yi=j))xi
w j : = w j − η δ w j J ( w ) w_j := w_j -\eta \delta _{w_j} J(w) wj:=wj−ηδwjJ(w)
W : = W − η ( δ w 1 J ( w ) T δ w 2 J ( w ) T ⋮ δ w k J ( w ) T ) W:=W-\eta\begin{pmatrix} \delta _{w_1} J(w)^T \\ \delta _{w_2} J(w)^T \\ \vdots\\\delta _{w_k} J(w)^T \\ \end{pmatrix} \quad W:=W−η⎝⎜⎜⎜⎛δw1J(w)Tδw2J(w)T⋮δwkJ(w)T⎠⎟⎟⎟⎞
本文深入探讨了Logistic回归及其在二分类问题中的应用,介绍了sigmoid激活函数、损失函数、梯度下降法,并提供了Python实现。接着,文章转向Softmax回归,解释了其在多分类问题中的作用,同样涵盖损失函数和更新规则。通过实例展示了Logistic和Softmax回归的训练过程。
1027

被折叠的 条评论
为什么被折叠?



