利用Python-caffe进行图像分类,卷积核的显示,激活值特征图的显示以及全连接层直方图显示

本文详细介绍了如何使用Python接口的Caffe进行图像分类,并展示了卷积核、激活值特征图以及全连接层直方图的可视化过程,帮助理解深度学习模型的工作原理。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >


  
  
1:设置python,numpy,matplotlib
#设置Python环境:numpy用于数值计算,matplotlib用于画图
# set up Python environment: numpy for numerical routines, and matplotlib for plotting
import numpy as np
import matplotlib.pyplot as plt
#设置pyplot的参数
# set display defaults
plt.rcParams['figure.figsize'] = (10, 10)        # large images
plt.rcParams['image.interpolation'] = 'nearest'  # don't interpolate: show square pixels
plt.rcParams['image.cmap'] = 'gray'  # use grayscale output rather than a (potentially misleading) color heatmap

2:加载caffe
# The caffe module needs to be on the Python path;
#  we'll add it here explicitly.
import sys
caffe_root = '/home/amax/Downloads/caffe/'  # this file should be run from {caffe_root}/examples (otherwise change this line)
sys.path.insert(0, caffe_root + 'python')
import caffe
# If you get "No module named _caffe", either you have not built pycaffe or you have the wrong path.

3:加载网络,设置为CPU模式
caffe.set_mode_cpu()
model_def = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'
model_weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
net = caffe.Net(model_def,      # defines the structure of the model
                model_weights,  # contains the trained weights
                caffe.TEST)     # use test mode (e.g., don't perform dropout)

4:图像预处理
# load the mean ImageNet image (as distributed with Caffe) for subtraction
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1)  # average over pixels to obtain the mean (BGR) pixel values
print 'mean-subtracted values:', zip('BGR', mu)
输出为:mean-subtracted values: [('B', 104.0069879317889), ('G', 116.66876761696767), ('R', 122.6789143406786)]
# create transformer for the input called 'data'
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))  # move image channels to outermost dimension
transformer.set_mean('data', mu)            # subtract the dataset-mean value in each channel
transformer.set_raw_scale('data', 255)      # rescale from [0, 1] to [0, 255]
transformer.set_channel_swap('data', (2,1,0))  # swap channels from RGB to BGR
# set the size of the input (we can skip this if we're happy
#  with the default; we can also change it later, e.g., for different batch sizes)
net.blobs['data'].reshape(50,        # batch size
                          3,         # 3-channel (BGR) images
                          227, 227)  # image size is 227x227
image = caffe.io.load_image(caffe_root + 'examples/images/cat.jpg')
transformed_image = transformer.preprocess('data', image)
plt.imshow(image)

# copy the image data into the memory allocated for the net
net.blobs['data'].data[...] = transformed_image

5:执行分类
output = net.forward()
output_prob = output['prob'][0]  # the output probability vector for the first image in the batch
print 'predicted class is:', output_prob.argmax()
输出为:predicted class is: 281
#推断分类标签
# load ImageNet labels
labels_file = caffe_root + 'data/ilsvrc12/synset_words.txt'
if not os.path.exists(labels_file):
    !../data/ilsvrc12/get_ilsvrc_aux.sh 
labels = np.loadtxt(labels_file, str, delimiter='\t')
print 'output label:', labels[output_prob.argmax()]
输出为:output label: n02123045 tabby, tabby cat
#输出前5的分类推断
# sort top five predictions from softmax output
top_inds = output_prob.argsort()[::-1][:5]  # reverse sort and take five largest items
print 'probabilities and labels:'
zip(output_prob[top_inds], labels[top_inds])
输出为:probabilities and labels:
[(0.31243637, 'n02123045 tabby, tabby cat'),
 (0.2379719, 'n02123159 tiger cat'),
 (0.12387239, 'n02124075 Egyptian cat'),
 (0.10075711, 'n02119022 red fox, Vulpes vulpes'),
 (0.070957087, 'n02127052 lynx, catamount')]

6:使用GPU运行我们的程序
caffe.set_device(0)  # if we have multiple GPUs, pick the first one
caffe.set_mode_gpu()
net.forward()  # run once before timing to set up memory

7:查看网络的卷积层
首先查看每一层的激活值,储存在net.blob中
# for each layer, show the output shape
for layer_name, blob in net.blobs.iteritems():
    print layer_name + '\t' + str(blob.data.shape)
net.blob是一个有序字典:结构如下图所示:
OrderedDict([('data', <caffe._caffe.Blob object at 0x7116ed8>), ('conv1', <caffe._caffe.Blob object at 0x7116e60>), ('pool1', <caffe._caffe.Blob object at 0x7116de8>), ('norm1', <caffe._caffe.Blob object at 0x7116d70>), ('conv2', <caffe._caffe.Blob object at 0x7116cf8>), ('pool2', <caffe._caffe.Blob object at 0x7116c80>), ('norm2', <caffe._caffe.Blob object at 0x7116f50>), ('conv3', <caffe._caffe.Blob object at 0x663d050>), ('conv4', <caffe._caffe.Blob object at 0x663d0c8>), ('conv5', <caffe._caffe.Blob object at 0x663d140>), ('pool5', <caffe._caffe.Blob object at 0x663d1b8>), ('fc6', <caffe._caffe.Blob object at 0x663d230>), ('fc7', <caffe._caffe.Blob object at 0x663d2a8>), ('fc8', <caffe._caffe.Blob object at 0x663d320>), ('prob', <caffe._caffe.Blob object at 0x663d398>)])
输出为:
data	(50, 3, 227, 227)
conv1	(50, 96, 55, 55)
pool1	(50, 96, 27, 27)
norm1	(50, 96, 27, 27)
conv2	(50, 256, 27, 27)
pool2	(50, 256, 13, 13)
norm2	(50, 256, 13, 13)
conv3	(50, 384, 13, 13)
conv4	(50, 384, 13, 13)
conv5	(50, 256, 13, 13)
pool5	(50, 256, 6, 6)
fc6	(50, 4096)
fc7	(50, 4096)
fc8	(50, 1000)
prob	(50, 1000)
代表的是(natch_size, channel_dim, height, width)

接着查看每一层卷积核的值,储存在net.params中
net.params是一个有序字典:结构如下图所示:
OrderedDict([('conv1', <caffe._caffe.BlobVec object at 0x69943d0>), ('conv2', <caffe._caffe.BlobVec object at 0x6994c90>), ('conv3', <caffe._caffe.BlobVec object at 0x69946e0>), ('conv4', <caffe._caffe.BlobVec object at 0x69947c0>), ('conv5', <caffe._caffe.BlobVec object at 0x6994830>), ('fc6', <caffe._caffe.BlobVec object at 0x6994600>), ('fc7', <caffe._caffe.BlobVec object at 0x6994910>), ('fc8', <caffe._caffe.BlobVec object at 0x6994980>)])
输出为:
conv1	(96, 3, 11, 11) (96,)
conv2	(256, 48, 5, 5) (256,)
conv3	(384, 256, 3, 3) (384,)
conv4	(384, 192, 3, 3) (384,)
conv5	(256, 192, 3, 3) (256,)
fc6	(4096, 9216) (4096,)
fc7	(4096, 4096) (4096,)
fc8	(1000, 4096) (1000,)
代表的是(output_channels, input_channels, filter_height, filter_width),(output_channels)(偏置项)

#定义一个函数来查看特征图
def vis_square(data):
    """Take an array of shape (n, height, width) or (n, height, width, 3)
       and visualize each (height, width) thing in a grid of size approx. sqrt(n) by sqrt(n)"""
    # normalize data for display
    data = (data - data.min()) / (data.max() - data.min())
    # force the number of filters to be square
    n = int(np.ceil(np.sqrt(data.shape[0])))
    padding = (((0, n ** 2 - data.shape[0]),
               (0, 1), (0, 1))                 # add some space between filters
               + ((0, 0),) * (data.ndim - 3))  # don't pad the last dimension (if there is one)
    data = np.pad(data, padding, mode='constant', constant_values=1)  # pad with ones (white)
    # tile the filters into an image
    data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1)))
    data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
    plt.imshow(data); plt.axis('off')

#看conv1层卷积核的特征图
# the parameters are a list of [weights, biases]
filters = net.params['conv1'][0].data
vis_square(filters.transpose(0, 2, 3, 1))
有96个卷积核,同时显示

#看conv1层激活值的特征图
feat = net.blobs['conv1'].data[0, :36]
vis_square(feat)
显示前36个激活层


#看pool5层激活值的特征图
feat = net.blobs['pool5'].data[0]
vis_square(feat)
显示所有256个激活层
#看全连接层输出值的直方图
feat = net.blobs['fc6'].data[0]
plt.subplot(2, 1, 1)
plt.plot(feat.flat)
plt.subplot(2, 1, 2)
_ = plt.hist(feat.flat[feat.flat > 0], bins=100)

#看概率层输出值的直方图
feat = net.blobs['prob'].data[0]
plt.figure(figsize=(15, 3))
plt.plot(feat.flat)



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值