代码:https://github.com/kratzert/finetune_alexnet_with_tensorflow (juypter notebook)
Alexnet:
代码解析:
一、首先定义一个类:
class AlexNet(object):
二、之后定义类的初始化:
注意下,'bvlc_alexnet.npy'为别人训练好的Alexnet的权值,x为输入图像,keep_prob为全连接层dropout的概率,num_classes是分类数
def __init__(self, x, keep_prob, num_classes, skip_layer,
weights_path='DEFAULT'):
"""Create the graph of the AlexNet model.
Args:
x: Placeholder for the input tensor.
keep_prob: Dropout probability.
num_classes: Number of classes in the dataset.
skip_layer: List of names of the layer, that get trained from
scratch
weights_path: Complete path to the pretrained weight file, if it
isn't in the same folder as this code
"""
# Parse input arguments into class variables
self.X = x
self.NUM_CLASSES = num_classes
self.KEEP_PROB = keep_prob
self.SKIP_LAYER = skip_layer
if weights_path == 'DEFAULT':
self.WEIGHTS_PATH = 'bvlc_alexnet.npy'
else:
self.WEIGHTS_PATH = weights_path
# Call the create function to build the computational graph of AlexNet
self.create()
三、初始化函数调用了create():
def create(self):
"""Create the network graph."""
# 1st Layer: Conv (w ReLu) -> Lrn -> Pool
conv1 = conv(self.X, 11, 11, 96, 4, 4, padding='VALID', name='conv1')
norm1 = lrn(conv1, 2, 2e-05, 0.75, name='norm1')
pool1 = max_pool(norm1, 3, 3, 2, 2, padding='VALID', name='pool1')
# 2nd Layer: Conv (w ReLu) -> Lrn -> Pool with 2 groups
conv2 = conv(pool1, 5, 5, 256, 1, 1, groups=2, name='conv2')
norm2 = lrn(conv2, 2, 2e-05, 0.75, name='norm2')
pool2 = max_pool(norm2, 3, 3, 2, 2, padding='VALID', name='pool2')
# 3rd Layer: Conv (w ReLu)
conv3 = conv(pool2, 3, 3, 384, 1, 1, name='conv3')
# 4th Layer: Conv (w ReLu) splitted into two groups
conv4 = conv(conv3, 3, 3, 384, 1, 1, groups=2, name='conv4')
# 5th Layer: Conv (w ReLu) -> Pool splitted into two groups
conv5 = conv(conv4, 3, 3, 256, 1, 1, groups=2, name='conv5')
pool5 = max_pool(conv5, 3, 3, 2, 2, padding='VALID', name='pool5')
# 6th Layer: Flatten -> FC (w ReLu) -> Dropout
flattened = tf.reshape(pool5, [-1, 6*6*256])
fc6 = fc(flattened, 6*6*256, 4096, name='fc6')
dropout6 = dropout(fc6, self.KEEP_PROB)
# 7th Layer: FC (w ReLu) -> Dropout
fc7 = fc(dropout6, 4096, 4096, name='fc7')
dropout7 = dropout(fc7, self.KEEP_PROB)
# 8th Layer: FC and return unscaled activations
self.fc8 = fc(dropout7, 4096, self.NUM_CLASSES, relu=False, name='fc8')
四、conv层:
注意,由于是用的别人训练好的权值,所以这里一样,仿照原作者的Alexnet(在两个GPU上训练)。
其中语法:
1、for i, k in zip(input_groups, weight_groups)
for (a,b,c) in zip(ta,tb,tc):从多个等长序列中取一个元素作为新序列
2、tf.split
tf.split( value, num_or_size_splits, axis=0, num=None, name='split' )value:准备切分的张量
num_or_size_splits:准备切成几份
axis : 准备在第几个维度上进行切割
其中分割方式分为两种
1. 如果num_or_size_splits 传入的 是一个整数,那直接在axis=D这个维度上把张量平均切分成几个小张量
2. 如果num_or_size_splits 传入的是一个向量(这里向量各个元素的和要跟原本这个维度的数值相等)就根据这个向量有几个元素分为几项)
3、tf.concat()是tensorflow中用来拼接张量的函数
def conv(x, filter_height, filter_width, num_filters, stride_y, stride_x, name,
padding='SAME', groups=1):
"""Create a convolution layer.
Adapted from: https://github.com/ethereon/caffe-tensorflow
"""
# Get number of input channels
input_channels = int(x.get_shape()[-1])
# Create lambda function for the convolution
convolve = lambda i, k: tf.nn.conv2d(i, k,
strides=[1, stride_y, stride_x, 1],
padding=padding)
with tf.variable_scope(name) as scope:
# Create tf variables for the weights and biases of the conv layer
weights = tf.get_variable('weights', shape=[filter_height,
filter_width,
input_channels/groups,
num_filters])
biases = tf.get_variable('biases', shape=[num_filters])
if groups == 1:
conv = convolve(x, weights)
# In the cases of multiple groups, split inputs & weights and
else:
# Split input and weights and convolve them separately
input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x)
weight_groups = tf.split(axis=3, num_or_size_splits=groups,
value=weights)
output_groups = [convolve(i, k) for i, k in zip(input_groups, weight_groups)]
# Concat the convolved output together again
conv = tf.concat(axis=3, values=output_groups)
# Add biases
bias = tf.reshape(tf.nn.bias_add(conv, biases), tf.shape(conv))
# Apply relu function
relu = tf.nn.relu(bias, name=scope.name)
return relu
五、全连接层:
这里是xw+b,不是wx+b,注意下:
def fc(x, num_in, num_out, name, relu=True):
"""Create a fully connected layer."""
with tf.variable_scope(name) as scope:
# Create tf variables for the weights and biases
weights = tf.get_variable('weights', shape=[num_in, num_out],
trainable=True)
biases = tf.get_variable('biases', [num_out], trainable=True)
# Matrix multiply weights and inputs and add bias
act = tf.nn.xw_plus_b(x, weights, biases, name=scope.name)
if relu:
# Apply ReLu non linearity
relu = tf.nn.relu(act)
return relu
else:
return act
六、池化层、dropout层和LRN层(Andrew Ng说LRN没什么用:)
def max_pool(x, filter_height, filter_width, stride_y, stride_x, name,
padding='SAME'):
"""Create a max pooling layer."""
return tf.nn.max_pool(x, ksize=[1, filter_height, filter_width, 1],
strides=[1, stride_y, stride_x, 1],
padding=padding, name=name)
def lrn(x, radius, alpha, beta, name, bias=1.0):
"""Create a local response normalization layer."""
return tf.nn.local_response_normalization(x, depth_radius=radius,
alpha=alpha, beta=beta,
bias=bias, name=name)
def dropout(x, keep_prob):
"""Create a dropout layer."""
return tf.nn.dropout(x, keep_prob)
七、加载别人训练好的权重 :
def load_initial_weights(self, session):
"""Load weights from file into network.
As the weights from http://www.cs.toronto.edu/~guerzhoy/tf_alexnet/
come as a dict of lists (e.g. weights['conv1'] is a list) and not as
dict of dicts (e.g. weights['conv1'] is a dict with keys 'weights' &
'biases') we need a special load function
"""
# Load the weights into memory
weights_dict = np.load(self.WEIGHTS_PATH, encoding='bytes').item()
# Loop over all layer names stored in the weights dict
for op_name in weights_dict:
# Check if layer should be trained from scratch
if op_name not in self.SKIP_LAYER:
with tf.variable_scope(op_name, reuse=True):
# Assign weights/biases to their corresponding tf variable
for data in weights_dict[op_name]:
# Biases
if len(data.shape) == 1:
var = tf.get_variable('biases', trainable=False)
session.run(var.assign(data))
# Weights
else:
var = tf.get_variable('weights', trainable=False)
session.run(var.assign(data))
八、现在已经定义好了Alexnet,可以调用模型
最后一层为softmax层,model = AlexNet(x, keep_prob, 1000, [])中的1000为最后一层的输出神经元数目,在这一层加上softmax即可。
from alexnet import AlexNet
from caffe_classes import class_names
#placeholder for input and dropout rate
x = tf.placeholder(tf.float32, [1, 227, 227, 3])
keep_prob = tf.placeholder(tf.float32)
#create model with default config ( == no skip_layer and 1000 units in the last layer)
model = AlexNet(x, keep_prob, 1000, [])
#define activation of last layer as score
score = model.fc8
#create op to calculate softmax
softmax = tf.nn.softmax(score)
九、最后开始测试自己的图片:
注意将自己的图片reszie到227*227*3,因为这是Alexnet定义时,要求输入的大小
with tf.Session() as sess:
# Initialize all variables
sess.run(tf.global_variables_initializer())
# Load the pretrained weights into the model
model.load_initial_weights(sess)
# Create figure handle
fig2 = plt.figure(figsize=(15,6))
# Loop over all images
for i, image in enumerate(imgs):
# Convert image to float32 and resize to (227x227)
img = cv2.resize(image.astype(np.float32), (227,227))
# Subtract the ImageNet mean
img -= imagenet_mean
# Reshape as needed to feed into model
img = img.reshape((1,227,227,3))
# Run the session and calculate the class probability
probs = sess.run(softmax, feed_dict={x: img, keep_prob: 1})
# Get the class name of the class with the highest probability
class_name = class_names[np.argmax(probs)]
# Plot image with class name and prob in the title
fig2.add_subplot(3,2,i+1)
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.title("Class: " + class_name + ", probability: %.4f" %probs[0,np.argmax(probs)])
plt.axis('off')