Logistic Regression & softmax

最新推荐文章于 2023-07-28 11:33:14 发布

原创最新推荐文章于 2023-07-28 11:33:14 发布 · 1k 阅读

2 ·

CC 4.0 BY-SA版权

文章标签：

#Logistic Regression

机器学习&&数据挖掘专栏收录该内容

79 篇文章

订阅专栏

本文详细介绍了逻辑回归的基本原理、如何使用梯度下降法进行参数训练，以及如何通过softmax函数扩展逻辑回归以解决多分类问题。以实际数据集为例，演示了从数据预处理到模型训练的全过程，并最终评估了模型在测试集上的性能。

Logistic Regression 就是一个被logistic方程归一化后的线性回归。

对于二分类问题，我们输入向量x[x1,x2...xn]，Θ（θ0，θ1，θ2，···θn）为我们的学习算法所学到的参数，分类结果为0和1。令

可写为

用一个sigmoid函数去做映射

g（z）的值接近0则输入样本就归为0类，否则归为1类。所以现在我们需要训练参数Θ（θ0，θ1，θ2，···θn）。我们用梯度下降法去训练参数。

Logistic Regression只能处理两分类问题，在其基础上衍生出来的softmax可以用于多分类，且必须线性可分。

#coding:utf-8
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

#train_data
N = 300 # number of points per class
D = 2 # dimensionality
K = 3 # number of classes
X = np.zeros((N*K,D)) # data matrix (each row = single example)
Y = np.zeros(N*K, dtype='uint8') # class labels
for j in xrange(K):
  ix = range(N*j,N*(j+1))
  r = np.linspace(0.0,1,N) # radius
  t = np.linspace(j*4,(j+1)*4,N) + np.random.randn(N)*0.2 # theta
  X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
  Y[ix] = j

#test_data
NN=100
X_test = np.zeros((NN*K,D)) # data matrix (each row = single example)
Y_test = np.zeros(NN*K, dtype='uint8') # class labels
for j in xrange(K):
  ix = range(NN*j,NN*(j+1))
  r = np.linspace(0.0,1,NN) # radius
  t = np.linspace(j*4,(j+1)*4,NN) + np.random.randn(NN)*0.2 # theta
  X_test[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
  Y_test[ix] = j

# lets visualize the data:
plt.scatter(X[:, 0], X[:, 1], c=Y, s=40, cmap=plt.cm.Spectral)
plt.show()

plt.scatter(X_test[:, 0], X_test[:, 1], c=Y_test, s=40, cmap=plt.cm.Spectral)
plt.show()

def dense_to_one_hot(labels_dense, num_classes=3):
  """Convert class labels from scalars to one-hot vectors."""
  num_labels = labels_dense.shape[0]
  index_offset = np.arange(num_labels) * num_classes
  labels_one_hot = np.zeros((num_labels, num_classes))
  labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
  return labels_one_hot

Y=dense_to_one_hot(Y,3)
Y_test=dense_to_one_hot(Y_test,3)


n_input = 2
n_output = 3
net_input = tf.placeholder(tf.float32, [None, n_input])

# %% We can write a simple regression (y = W*x + b) as:
W = tf.Variable(tf.random_normal((n_input ,n_output), stddev=0.01))
b = tf.Variable(tf.zeros([n_output]))
net_output = tf.nn.softmax(tf.matmul(net_input, W) + b)

#占位符
y_true = tf.placeholder(tf.float32, [None, n_output])

#计算损失函数
cross_entropy = -tf.reduce_sum(y_true * tf.log(net_output))

# %% This would equate each label in our one-hot vector between the
# prediction and actual using the argmax as the predicted label
correct_prediction = tf.equal(
    tf.argmax(net_output, 1), tf.argmax(y_true, 1))

# %% And now we can look at the mean of our network's correct guesses
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

# %% We can tell the tensorflow graph to train w/ gradient descent using
# our loss function and an input learning rate
optimizer = tf.train.GradientDescentOptimizer(
    0.01).minimize(cross_entropy)

# %% We now create a new session to actually perform the initialization the
# variables:
sess = tf.Session()
sess.run(tf.initialize_all_variables())


n_epochs = 600
for epoch_i in range(n_epochs):
    sess.run(optimizer, feed_dict={
            net_input: X,
            y_true: Y
        })
    #print(sess.run(accuracy,
    #               feed_dict={
    #                   net_input: mnist.validation.images,
    #                   y_true: mnist.validation.labels
    #               }))

# %% Print final test accuracy:
print(sess.run(accuracy,
               feed_dict={
                   net_input: X_test,
                   y_true: Y_test
               }))