先上公式
Tensorflow中对卷积的定义:
padding=‘SAME’:
O u t p u t h , w = M a t h . c e i l ( I n p u t h , w / S t r i d e h , w ) Output_{h,w}=Math.ceil(Input_{h,w} / Stride_{h,w}) Outputh,w=Math.ceil(Inputh,w/Strideh,w)
padding=‘VALID’:
O u t p u t h , w = M a t h . c e i l ( ( I n p u t h , w − F i l t e r h , w + 1 ) / S t r i d e h , w ) Output_{h,w}=Math.ceil((Input_{h,w}-Filter_{h,w}+1)/Stride_{h,w}) Outputh,w=Math.ceil((Inputh,w−Filterh,w+1)/Strideh,w)
以上Output对应输出feature map、Input对应输入的原图、Filter对应卷积核、Stride对应步长
通过input、output、filter来计算padding
2 ∗ P h , w = ( O u t p u t h , w − 1 ) ∗ S t r i d e h , w − I n p u t h , w + F i l t e r h , w 2*P_{h,w} = (Output_{h,w} - 1) * Stride_{h,w} - Input_{h,w}+ Filter_{h,w} 2∗Ph,w=(Outputh,w−1)∗Strideh,w−Inputh,w+Filterh,w
PS:这边需要注意的是2P算出来的可能为奇数、比如padding='SAME’时候、Input = [350,500],Filter = [5,5],Stride = [2,2],Output = [175,250],最终计算的2P_h = (175-1)*2-350+5 = 348-350+5 = 3,一开始我上左分配2,右下分配1,导致计算结果不匹配,后来发现应该是上左分配的为小数,右下分配剩下的也就是分别为1,2通过了比对测试
实现代码和注释
案例来自csdn人工智能课程第十周的进阶作业基本代码、自己实现conv2d,并通过测试
# 声明一些用到的库
import base64
from io import BytesIO
import math
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from PIL import Image
from const import img
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
# 自己实现的卷积函数
def conv2d(input, filter, stride, padding):
# batch x height x width x channels
in_s = input.shape
# height x width x in_channels x out_channels
f_s = filter.shape
temp = []
assert len(in_s) == 4, 'input size rank 4 required!'
assert len(f_s) == 4, 'filter size rank 4 required!'
assert f_s[2] == in_s[3], 'intput channels not match filter channels.'
assert f_s[0] >= stride and f_s[1] >= stride, 'filter should not be less than stride!'
assert padding in ['SAME', 'VALID'], 'padding value[{0}] not allowded!!'.format(padding)
if padding != 'VALID':
# tf官网的定义为padding=same的时候out_shape = math.ceil(in_shape / stride)
# padding=valid的时候out_shape = math.ceil((in_shape - f_shape + 1 / stride))
temp = np.array(in_s[1: 3]) / stride
else:
temp = (np.array(in_s[1: 3]) - np.array(f_s[: 2]) + 1) / stride
out_shape = (math.ceil(temp[0]), math.ceil(temp[1]))
out_shape = np.concatenate([in_s[:1], out_shape, f_s[-1:]])
output = np.zeros(out_shape)
# 计算padding
# out = (in - f + 2p) / stride + 1
# 2p = (out - 1) * stride - in + f
_2p = np.array(out_shape[1: 3] - 1) * stride - \
np.array(in_s[1: 3]) + np.array(f_s[: 2])
# 啊啊啊,这里tensorflow的卷积居然是上左padding分配了1 右下分配了2 一开始写成 上左2 下右边1 纳闷了半天
lp = np.array(_2p) // 2
rp = np.array(_2p) - np.array(lp)
input2 = input
if(lp.all()>0 and rp.all()>0):
input2 = np.pad(input, ((0, 0), (lp[0], rp[0]), (lp[1], rp[1]), (0, 0)), 'constant')
in_s = input2.shape
# 循环每个卷积核
for kernel in range(f_s[3]):
out_r = 0
# 逐行扫描,每次行数叠加stride,直到越界
for row in range(0, in_s[1], stride):
if(row+f_s[0] - 1 >= in_s[1]):
break
# 新的行迭代、列回到0
out_c = 0
# 每行逐列扫描,每次列数叠加stride,直到越界
for col in range(0, in_s[2], stride):
if(col+f_s[1] - 1 >= in_s[2]):
break
# print([row+f_s[0], col+f_s[1]])
# 提取原图的卷积核覆盖范围
cover = input2[:, row:row+f_s[0], col:col+f_s[1], :]
output[:, out_r, out_c, kernel] = np.sum(cover * filter[:, :, :, kernel])
out_c += 1
# 每次行迭代,feature map对应行加1
out_r += 1
return output
if __name__ == "__main__":
img_b = img
inf = BytesIO(base64.b64decode(img_b))
img = Image.open(inf)
img = np.asarray(img, dtype=np.uint8)
print(img.mean())
img = img/255
print(img.mean())
img = np.expand_dims(img, axis=0) # 将图像处理成为一个batch
# 先定义个计算图用于运行tf
input_tensor = tf.placeholder(
tf.float32, shape=[None, None, None, None], name='input')
filter_tensor = tf.placeholder(
tf.float32, shape=[None, None, None, None], name='filter')
output_tensor1 = tf.nn.conv2d(
input_tensor, filter_tensor, padding='SAME', strides=[1, 2, 2, 1])
output_tensor2 = tf.nn.conv2d(
input_tensor, filter_tensor, padding='VALID', strides=[1, 3, 3, 1])
try:
final_score = 0 # 这个是最终得分
filter = np.random.uniform(size=[5, 5, 3, 8])
output = conv2d(img, filter, 2, 'SAME')
with tf.Session() as sess:
output_tf = sess.run(
output_tensor1,
feed_dict={
input_tensor: img,
filter_tensor: filter
})
# 测试部分
assert output.shape == output_tf.shape, 'shape mismatch [{}] vs [{}]'.format(
output.shape, output_tf.shape)
final_score += 20 # shape算对了得20分
print(final_score)
print(output.shape)
print(output_tf.shape)
print(np.mean(output))
print(np.mean(output_tf))
diff = np.mean(np.abs(output - output_tf))
assert diff < 1e-5, 'value mismatch [{}]'.format(
diff) # 如果这一行没有报错的话,那么实现可以认为是正确的。
final_score += 30 # 数值算对了得30分
print('test 1 passed...')
filter = np.random.uniform(size=[5, 5, 3, 8])
output = conv2d(img, filter, 3, 'VALID')
with tf.Session() as sess:
output_tf = sess.run(
output_tensor2,
feed_dict={
input_tensor: img,
filter_tensor: filter
})
print(output.shape)
print(output_tf.shape)
assert output.shape == output_tf.shape, 'shape mismatch [{}] vs [{}]'.format(
output.shape, output_tf.shape)
final_score += 20 # shape算对了得20分
diff = np.mean(np.abs(output - output_tf))
assert diff < 1e-5, 'value mismatch [{}]'.format(
diff) # 如果这一行没有报错的话,那么实现可以认为是正确的。
final_score += 30 # 数值算对了得30分
print('test 2 passed...')
except Exception as ex:
print(ex)
print('Your final score:[{}]'.format(final_score))
# input 1 x 350 x 500 x 3
filter1 = np.random.uniform(size=[3, 3, 3, 8])
padding1 = 'SAME'
stride1 = 2
output1 = conv2d(img, filter1, stride1, padding1)
print(output1.shape)
# output 1 x 175 x 250 x 8
# input 1 x 175 x 250 x 8
filter2 = np.random.uniform(size=[5, 5, 8, 16])
padding2 = 'SAME'
stride2 = 2
output2 = conv2d(output1, filter2, stride2, padding2)
print(output2.shape)
# output 1 x 88 x 125 x 16
# input 1 x 88 x 125 x 16
filter2 = np.random.uniform(size=[3, 3, 16, 24])
padding2 = 'VALID'
stride2 = 3
output2 = conv2d(output2, filter2, stride2, padding2)
print(output2.shape)
# output 1 x 29 x 41 x 24
输出:
171.61454476190477
0.6729982147525674
20
(1, 175, 250, 8)
(1, 175, 250, 8)
25.769462842538342
25.76946
test 1 passed...
(1, 116, 166, 8)
(1, 116, 166, 8)
test 2 passed...
Your final score:[100]
(1, 175, 250, 8)
(1, 88, 125, 16)
(1, 29, 41, 24)