1:设置python,numpy,matplotlib
#设置Python环境:numpy用于数值计算,matplotlib用于画图
# set up Python environment: numpy for numerical routines, and matplotlib for plotting
import numpy as np
import matplotlib.pyplot as plt
#设置pyplot的参数
# set display defaults
plt.rcParams['figure.figsize'] = (10, 10) # large images
plt.rcParams['image.interpolation'] = 'nearest' # don't interpolate: show square pixels
plt.rcParams['image.cmap'] = 'gray' # use grayscale output rather than a (potentially misleading) color heatmap
2:加载caffe
# The caffe module needs to be on the Python path;
# we'll add it here explicitly.
import sys
caffe_root = '/home/amax/Downloads/caffe/' # this file should be run from {caffe_root}/examples (otherwise change this line)
sys.path.insert(0, caffe_root + 'python')
import caffe
# If you get "No module named _caffe", either you have not built pycaffe or you have the wrong path.
3:加载网络,设置为CPU模式
caffe.set_mode_cpu()
model_def = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'
model_weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
net = caffe.Net(model_def, # defines the structure of the model
model_weights, # contains the trained weights
caffe.TEST) # use test mode (e.g., don't perform dropout)
4:图像预处理
# load the mean ImageNet image (as distributed with Caffe) for subtraction
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1) # average over pixels to obtain the mean (BGR) pixel values
print 'mean-subtracted values:', zip('BGR', mu)
输出为:mean-subtracted values: [('B', 104.0069879317889), ('G', 116.66876761696767), ('R', 122.6789143406786)]
# create transformer for the input called 'data'
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1)) # move image channels to outermost dimension
transformer.set_mean('data', mu) # subtract the dataset-mean value in each channel
transformer.set_raw_scale('data', 255) # rescale from [0, 1] to [0, 255]
transformer.set_channel_swap('data', (2,1,0)) # swap channels from RGB to BGR
# set the size of the input (we can skip this if we're happy
# with the default; we can also change it later, e.g., for different batch sizes)
net.blobs['data'].reshape(50, # batch size
3, # 3-channel (BGR) images
227, 227) # image size is 227x227
image = caffe.io.load_image(caffe_root + 'examples/images/cat.jpg')
transformed_image = transformer.preprocess('data', image)
plt.imshow(image)

# copy the image data into the memory allocated for the net
net.blobs['data'].data[...] = transformed_image
5:执行分类
output = net.forward()
output_prob = output['prob'][0] # the output probability vector for the first image in the batch
print 'predicted class is:', output_prob.argmax()
输出为:predicted class is: 281
#推断分类标签
# load ImageNet labels
labels_file = caffe_root + 'data/ilsvrc12/synset_words.txt'
if not os.path.exists(labels_file):
!../data/ilsvrc12/get_ilsvrc_aux.sh
labels = np.loadtxt(labels_file, str, delimiter='\t')
print 'output label:', labels[output_prob.argmax()]
输出为:output label: n02123045 tabby, tabby cat
#输出前5的分类推断
# sort top five predictions from softmax output
top_inds = output_prob.argsort()[::-1][:5] # reverse sort and take five largest items
print 'probabilities and labels:'
zip(output_prob[top_inds], labels[top_inds])
输出为:probabilities and labels:
[(0.31243637, 'n02123045 tabby, tabby cat'),
(0.2379719, 'n02123159 tiger cat'),
(0.12387239, 'n02124075 Egyptian cat'),
(0.10075711, 'n02119022 red fox, Vulpes vulpes'),
(0.070957087, 'n02127052 lynx, catamount')]
6:使用GPU运行我们的程序
caffe.set_device(0) # if we have multiple GPUs, pick the first one
caffe.set_mode_gpu()
net.forward() # run once before timing to set up memory
7:查看网络的卷积层
首先查看每一层的激活值,储存在net.blob中
# for each layer, show the output shape
for layer_name, blob in net.blobs.iteritems():
print layer_name + '\t' + str(blob.data.shape)
net.blob是一个有序字典:结构如下图所示:
OrderedDict([('data', <caffe._caffe.Blob object at 0x7116ed8>), ('conv1', <caffe._caffe.Blob object at 0x7116e60>), ('pool1', <caffe._caffe.Blob object at 0x7116de8>), ('norm1', <caffe._caffe.Blob object at 0x7116d70>), ('conv2', <caffe._caffe.Blob object at 0x7116cf8>), ('pool2', <caffe._caffe.Blob object at 0x7116c80>), ('norm2', <caffe._caffe.Blob object at 0x7116f50>), ('conv3', <caffe._caffe.Blob object at 0x663d050>), ('conv4', <caffe._caffe.Blob object at 0x663d0c8>), ('conv5', <caffe._caffe.Blob object at 0x663d140>), ('pool5', <caffe._caffe.Blob object at 0x663d1b8>), ('fc6', <caffe._caffe.Blob object at 0x663d230>), ('fc7', <caffe._caffe.Blob object at 0x663d2a8>), ('fc8', <caffe._caffe.Blob object at 0x663d320>), ('prob', <caffe._caffe.Blob object at 0x663d398>)])
输出为:
data (50, 3, 227, 227)
conv1 (50, 96, 55, 55)
pool1 (50, 96, 27, 27)
norm1 (50, 96, 27, 27)
conv2 (50, 256, 27, 27)
pool2 (50, 256, 13, 13)
norm2 (50, 256, 13, 13)
conv3 (50, 384, 13, 13)
conv4 (50, 384, 13, 13)
conv5 (50, 256, 13, 13)
pool5 (50, 256, 6, 6)
fc6 (50, 4096)
fc7 (50, 4096)
fc8 (50, 1000)
prob (50, 1000)
代表的是(natch_size, channel_dim, height, width)
接着查看每一层卷积核的值,储存在net.params中
net.params是一个有序字典:结构如下图所示:
OrderedDict([('conv1', <caffe._caffe.BlobVec object at 0x69943d0>), ('conv2', <caffe._caffe.BlobVec object at 0x6994c90>), ('conv3', <caffe._caffe.BlobVec object at 0x69946e0>), ('conv4', <caffe._caffe.BlobVec object at 0x69947c0>), ('conv5', <caffe._caffe.BlobVec object at 0x6994830>), ('fc6', <caffe._caffe.BlobVec object at 0x6994600>), ('fc7', <caffe._caffe.BlobVec object at 0x6994910>), ('fc8', <caffe._caffe.BlobVec object at 0x6994980>)])
输出为:
conv1 (96, 3, 11, 11) (96,)
conv2 (256, 48, 5, 5) (256,)
conv3 (384, 256, 3, 3) (384,)
conv4 (384, 192, 3, 3) (384,)
conv5 (256, 192, 3, 3) (256,)
fc6 (4096, 9216) (4096,)
fc7 (4096, 4096) (4096,)
fc8 (1000, 4096) (1000,)
代表的是(output_channels, input_channels, filter_height, filter_width),(output_channels)(偏置项)
#定义一个函数来查看特征图
def vis_square(data):
"""Take an array of shape (n, height, width) or (n, height, width, 3)
and visualize each (height, width) thing in a grid of size approx. sqrt(n) by sqrt(n)"""
# normalize data for display
data = (data - data.min()) / (data.max() - data.min())
# force the number of filters to be square
n = int(np.ceil(np.sqrt(data.shape[0])))
padding = (((0, n ** 2 - data.shape[0]),
(0, 1), (0, 1)) # add some space between filters
+ ((0, 0),) * (data.ndim - 3)) # don't pad the last dimension (if there is one)
data = np.pad(data, padding, mode='constant', constant_values=1) # pad with ones (white)
# tile the filters into an image
data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1)))
data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
plt.imshow(data); plt.axis('off')
#看conv1层卷积核的特征图
# the parameters are a list of [weights, biases]
filters = net.params['conv1'][0].data
vis_square(filters.transpose(0, 2, 3, 1))
有96个卷积核,同时显示
#看conv1层激活值的特征图
feat = net.blobs['conv1'].data[0, :36]
vis_square(feat)
显示前36个激活层
#看pool5层激活值的特征图
feat = net.blobs['pool5'].data[0]
vis_square(feat)
显示所有256个激活层
#看全连接层输出值的直方图
feat = net.blobs['fc6'].data[0]
plt.subplot(2, 1, 1)
plt.plot(feat.flat)
plt.subplot(2, 1, 2)
_ = plt.hist(feat.flat[feat.flat > 0], bins=100)
#看概率层输出值的直方图
feat = net.blobs['prob'].data[0]
plt.figure(figsize=(15, 3))
plt.plot(feat.flat)