莫凡2017tensorflow(使用更简便版)https://github.com/MorvanZhou/Tensorflow-Tutorial
17.分类学习
https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-01-classifier/
之前的是使用TensorFlow解决Regression (回归)问题。这次我们会介绍如何使用TensorFlow解决Classification(分类)问题。
MNIST库是手写体数字库,数据中包含55000张训练图片,每张图片的分辨率是28×28,所以我们的训练网络输入应该是28×28=784个像素数据。调用add_layer函数搭建一个最简单的训练网络结构,只有输入层和输出层,其中输入数据是784个特征,输出数据是10个特征(0到9),激励采用softmax函数https://blog.youkuaiyun.com/bitcarmanlee/article/details/82320853。
loss函数(即最优化目标函数)选用交叉熵函数Cross entropy loss。交叉熵用来衡量预测值和真实值的相似程度,如果完全相同,它们的交叉熵等于零。train方法(最优化算法)采用梯度下降法。
数据训练时每次只取100张图片,免得数据太多训练太慢,每训练50次输出一下预测精度。
"""
Please note, this code is only for python 3+. If you are using python 2+, please modify the code accordingly.
"""
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# number 1 to 10 data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
def add_layer(inputs, in_size, out_size, activation_function=None,):
# add one more layer and return the output of this layer
Weights = tf.Variable(tf.random_normal([in_size, out_size]))
biases = tf.Variable(tf.zeros([1, out_size]) + 0.1,)
Wx_plus_b = tf.matmul(inputs, Weights) + biases
if activation_function is None:
outputs = Wx_plus_b
else:
outputs = activation_function(Wx_plus_b,)
return outputs
def compute_accuracy(v_xs, v_ys):
global prediction
y_pre = sess.run(prediction, feed_dict={xs: v_xs})
correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(v_ys,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy, feed_dict={xs: v_xs, ys: v_ys})
return result
# define placeholder for inputs to network
xs = tf.placeholder(tf.float32, [None, 784]) # 28x28
ys = tf.placeholder(tf.float32, [None, 10])
# add output layer
prediction = add_layer(xs, 784, 10, activation_function=tf.nn.softmax)
# the error between prediction and real data
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction),
reduction_indices=[1])) # loss
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.Session()
# important step
sess.run(tf.initialize_all_variables())
for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys})
if i % 50 == 0:
print(compute_accuracy(
mnist.test.images, mnist.test.labels))
这里因为翻墙原因,未能如愿得出越来越高的精确度结果。显示如下错误:
WARNING:tensorflow:From F:/BaiduNetdiskDownload/04-深度学习课程/TensorFlow教程/Tensorflow视频教程/tensorflowTUT源码/tf16_classification/full_code.py:9: read_data_sets (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
WARNING:tensorflow:From D:\应用软件\python\lib\site-packages\tensorflow\contrib\learn\python\learn\datasets\mnist.py:260: maybe_download (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.
Instructions for updating:
Please write your own downloading logic.
WARNING:tensorflow:From D:\应用软件\python\lib\site-packages\tensorflow\contrib\learn\python\learn\datasets\base.py:252: _internal_retry.<locals>.wrap.<locals>.wrapped_fn (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.
Instructions for updating:
Please use urllib or similar directly.
Traceback (most recent call last):
File "F:/BaiduNetdiskDownload/04-深度学习课程/TensorFlow教程/Tensorflow视频教程/tensorflowTUT源码/tf16_classification/full_code.py", line 9, in <module>
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
File "D:\应用软件\python\lib\site-packages\tensorflow\python\util\deprecation.py", line 250, in new_func
return func(*args, **kwargs)
File "D:\应用软件\python\lib\site-packages\tensorflow\contrib\learn\python\learn\datasets\mnist.py", line 260, in read_data_sets
source_url + TRAIN_IMAGES)
File "D:\应用软件\python\lib\site-packages\tensorflow\python\util\deprecation.py", line 250, in new_func
return func(*args, **kwargs)
File "D:\应用软件\python\lib\site-packages\tensorflow\contrib\learn\python\learn\datasets\base.py", line 252, in maybe_download
temp_file_name, _ = urlretrieve_with_retry(source_url)
File "D:\应用软件\python\lib\site-packages\tensorflow\python\util\deprecation.py", line 250, in new_func
return func(*args, **kwargs)
File "D:\应用软件\python\lib\site-packages\tensorflow\contrib\learn\python\learn\datasets\base.py", line 205, in wrapped_fn
return fn(*args, **kwargs)
File "D:\应用软件\python\lib\site-packages\tensorflow\contrib\learn\python\learn\datasets\base.py", line 233, in urlretrieve_with_retry
return urllib.request.urlretrieve(url, filename)
File "D:\应用软件\python\lib\urllib\request.py", line 187, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "D:\应用软件\python\lib\urllib\request.py", line 162, in urlopen
return opener.open(url, data, timeout)
File "D:\应用软件\python\lib\urllib\request.py", line 465, in open
response = self._open(req, data)
File "D:\应用软件\python\lib\urllib\request.py", line 483, in _open
'_open', req)
File "D:\应用软件\python\lib\urllib\request.py", line 443, in _call_chain
result = func(*args)
File "D:\应用软件\python\lib\urllib\request.py", line 1283, in https_open
context=self._context, check_hostname=self._check_hostname)
File "D:\应用软件\python\lib\urllib\request.py", line 1243, in do_open
r = h.getresponse()
File "D:\应用软件\python\lib\http\client.py", line 1174, in getresponse
response.begin()
File "D:\应用软件\python\lib\http\client.py", line 282, in begin
version, status, reason = self._read_status()
File "D:\应用软件\python\lib\http\client.py", line 243, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "D:\应用软件\python\lib\socket.py", line 575, in readinto
return self._sock.recv_into(b)
File "D:\应用软件\python\lib\ssl.py", line 924, in recv_into
return self.read(nbytes, buffer)
File "D:\应用软件\python\lib\ssl.py", line 786, in read
return self._sslobj.read(len, buffer)
File "D:\应用软件\python\lib\ssl.py", line 570, in read
v = self._sslobj.read(len, buffer)
TimeoutError: [WinError 10060] 由于连接方在一段时间后没有正确答复或连接的主机没有反应,连接尝试失败。
尝试1:https://www.cnblogs.com/8023spz/p/9545986.html关闭防火墙,也未能出正确结果。
尝试2:最后,参考https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-01-classifier/中评论的方法,直接将http://yann.lecun.com/exdb/mnist/中的压缩文件下载到MNIST_data目录下,就可以啦,完美~~~
0.1524
0.6469
0.7477
0.7853
0.8095
0.8255
0.839
0.8389
0.8495
0.8495
0.8551
0.8595
0.865
0.8658
0.866
0.8672
0.8688
0.8725
0.8776
0.8779
18.dropout(随机失活)解决overfitting问题
https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-02-A-overfitting/
解决过拟合问题的方法:
- 增加数据量:大部分过拟合产生的原因是因为数据量太少了,数据量增多,拟合线可被拉直
- 运用正规化:L1, l2 regularization等等,这些方法适用于大多数的机器学习,保证学出来的线条不会过于扭曲
- dropout:专门用在神经网络的正规化方法。在训练的时候, 我们随机忽略掉一些神经元和神经联结 , 使这个神经网络变得”不完整”. 用一个不完整的神经网络训练一次.Dropout 的做法是从根本上让神经网络没机会过度依赖.
dropout解决overfitting方法:
内容需要使用一下 sklearn 数据库当中的数据,
import tensorflow as tf
from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelBinarizer
keep_prob = tf.placeholder(tf.float32)
...
...
Wx_plus_b = tf.nn.dropout(Wx_plus_b, keep_prob)
这里的keep_prob
是保留概率,即我们要保留的结果所占比例,它作为一个placeholder
,在run
时传入, 当keep_prob=1
的时候,相当于100%保留,也就是dropout没有起作用。
digits = load_digits()
X = digits.data
y = digits.target
y = LabelBinarizer().fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)
其中X_train
是训练数据, X_test
是测试数据。 然后添加隐含层和输出层
# add output layer
l1 = add_layer(xs, 64, 50, 'l1', activation_function=tf.nn.tanh)
prediction = add_layer(l1, 50, 10, 'l2', activation_function=tf.nn.softmax)
loss函数(即最优化目标函数)选用交叉熵函数。交叉熵用来衡量预测值和真实值的相似程度,如果完全相同,交叉熵就等于零。
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction),
reduction_indices=[1])) # loss
train方法(最优化算法)采用梯度下降法。
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
最后开始train,总共训练500次。
sess.run(train_step, feed_dict={xs: X_train, ys: y_train, keep_prob: 0.5})
#sess.run(train_step, feed_dict={xs: X_train, ys: y_train, keep_prob: 1})
"""
Please note, this code is only for python 3+. If you are using python 2+, please modify the code accordingly.
"""
import tensorflow as tf
from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelBinarizer
# load data
digits = load_digits()
X = digits.data
y = digits.target
y = LabelBinarizer().fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)
def add_layer(inputs, in_size, out_size, layer_name, activation_function=None, ):
# add one more layer and return the output of this layer
Weights = tf.Variable(tf.random_normal([in_size, out_size]))
biases = tf.Variable(tf.zeros([1, out_size]) + 0.1, )
Wx_plus_b = tf.matmul(inputs, Weights) + biases
# here to dropout
Wx_plus_b = tf.nn.dropout(Wx_plus_b, keep_prob)
if activation_function is None:
outputs = Wx_plus_b
else:
outputs = activation_function(Wx_plus_b, )
#tf.histogram_summary(layer_name + '/outputs', outputs)
tf.summary.histogram(layer_name + '/outputs', outputs) # Tensorflow >= 0.12
return outputs
# define placeholder for inputs to network
keep_prob = tf.placeholder(tf.float32)
xs = tf.placeholder(tf.float32, [None, 64]) # 8x8
ys = tf.placeholder(tf.float32, [None, 10])
# add output layer
l1 = add_layer(xs, 64, 50, 'l1', activation_function=tf.nn.tanh)
prediction = add_layer(l1, 50, 10, 'l2', activation_function=tf.nn.softmax)
# the loss between prediction and real data
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction),
reduction_indices=[1])) # loss
#tf.scalar_summary('loss', cross_entropy)
tf.summary.scalar('loss', cross_entropy) # tensorflow >= 0.12
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
sess = tf.Session()
#merged = tf.merge_all_summaries()
merged = tf.summary.merge_all() # tensorflow >= 0.12
# summary writer goes in here
#train_writer = tf.train.SummaryWriter("logs/train", sess.graph)
train_writer = tf.summary.FileWriter("logs/train", sess.graph) # tensorflow >=0.12
#test_writer = tf.train.SummaryWriter("logs/test", sess.graph)
test_writer = tf.summary.FileWriter("logs/test", sess.graph) # tensorflow >=0.12
sess.run(tf.initialize_all_variables())
for i in range(500):
# here to determine the keeping probability
sess.run(train_step, feed_dict={xs: X_train, ys: y_train, keep_prob: 0.5})
if i % 50 == 0:
# record loss
train_result = sess.run(merged, feed_dict={xs: X_train, ys: y_train, keep_prob: 1})
test_result = sess.run(merged, feed_dict={xs: X_test, ys: y_test, keep_prob: 1})
train_writer.add_summary(train_result, i)
test_writer.add_summary(test_result, i)