tf.extract_image_patches用法

原文链接:https://stackoverflow.com/questions/40731433/understanding-tf-extract-image-patches-for-extracting-patches-from-an-image?newreg=2f7aa12c4b6d4870a979698318d457a4

tf.extract_image_patches: 提取图片中的一小块区域

表达式如下:

extract_image_patches(
images,
ksizes,
strides,
rates,
padding,
name=None
)

image:输入图像的tesnsor,必须是[batch, in_rows, in_cols, depth]类型

ksize:滑动窗口的大小,长度必须大于四

strides:每块patch区域之间中心点之间的距离,必须是: [1, stride_rows, stride_cols, 1].

rates:在原始图像的一块patch中,隔多少像素点,取一个有效像素点,必须是[1, rate_rows, rate_cols, 1]

padding:有两个取值,“VALID”或者“SAME”,“VALID”表示所取的patch区域必须完全包含在原始图像中."SAME"表示

可以取超出原始图像的部分,这一部分进行0填充。

for example:

import tensorflow as tf
 
n = 10
# images is a 1 x 10 x 10 x 1 array that contains the numbers 1 through 100 in order
images = [[[[x * n + y + 1] for y in range(n)] for x in range(n)]]
 
# We generate four outputs as follows:
# 1. 3x3 patches with stride length 5
# 2. Same as above, but the rate is increased to 2
# 3. 4x4 patches with stride length 7; only one patch should be generated
# 4. Same as above, but with padding set to 'SAME'
with tf.Session() as sess:
  print tf.extract_image_patches(images=images, ksizes=[1, 3, 3, 1], strides=[1, 5, 5, 1], rates=[1, 1, 1, 1], padding='VALID').eval(), '\n\n'
  print tf.extract_image_patches(images=images, ksizes=[1, 3, 3, 1], strides=[1, 5, 5, 1], rates=[1, 2, 2, 1], padding='VALID').eval(), '\n\n'
  print tf.extract_image_patches(images=images, ksizes=[1, 4, 4, 1], strides=[1, 7, 7, 1], rates=[1, 1, 1, 1], padding='VALID').eval(), '\n\n'
  print tf.extract_image_patches(images=images, ksizes=[1, 4, 4, 1], strides=[1, 7, 7, 1], rates=[1, 1, 1, 1], padding='SAME').eval()

Output:

[[[[ 1  2  3 11 12 13 21 22 23]
   [ 6  7  8 16 17 18 26 27 28]]
 
  [[51 52 53 61 62 63 71 72 73]
   [56 57 58 66 67 68 76 77 78]]]]
 
 
[[[[  1   3   5  21  23  25  41  43  45]
   [  6   8  10  26  28  30  46  48  50]]
 
  [[ 51  53  55  71  73  75  91  93  95]
   [ 56  58  60  76  78  80  96  98 100]]]]
 
 
[[[[ 1  2  3  4 11 12 13 14 21 22 23 24 31 32 33 34]]]]
 
 
[[[[  1   2   3   4  11  12  13  14  21  22  23  24  31  32  33  34]
   [  8   9  10   0  18  19  20   0  28  29  30   0  38  39  40   0]]
 
  [[ 71  72  73  74  81  82  83  84  91  92  93  94   0   0   0   0]
   [ 78  79  80   0  88  89  90   0  98  99 100   0   0   0   0   0]]]]

第一个输出可以看成是这样取值

 *  *  *  4  5  *  *  *  9 10 
 *  *  * 14 15  *  *  * 19 20 
 *  *  * 24 25  *  *  * 29 30 
31 32 33 34 35 36 37 38 39 40 
41 42 43 44 45 46 47 48 49 50 
 *  *  * 54 55  *  *  * 59 60 
 *  *  * 64 65  *  *  * 69 70 
 *  *  * 74 75  *  *  * 79 80 
81 82 83 84 85 86 87 88 89 90 
91 92 93 94 95 96 97 98 99 100 
``` import tensorflow as tf from keras import datasets, layers, models import matplotlib.pyplot as plt # 导入mnist数据,依次分别为训练集图片、训练集标签、测试集图片、测试集标签 (train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data() # 将像素的值标准化至0到1的区间内。(对于灰度图片来说,每个像素最大值是255,每个像素最小值是0,也就是直接除以255就可以完成归一化。) train_images, test_images = train_images / 255.0, test_images / 255.0 # 查看数据维数信息 print(train_images.shape,test_images.shape,train_labels.shape,test_labels.shape) #调整数据到我们需要的格式 train_images = train_images.reshape((60000, 28, 28, 1)) test_images = test_images.reshape((10000, 28, 28, 1)) print(train_images.shape,test_images.shape,train_labels.shape,test_labels.shape) train_images = train_images.astype("float32") / 255.0 def image_to_patches(images, patch_size=4): batch_size = tf.shape(images)[0] patches = tf.image.extract_patches( images=images[:, :, :, tf.newaxis], sizes=[1, patch_size, patch_size, 1], strides=[1, patch_size, patch_size, 1], rates=[1, 1, 1, 1], padding="VALID" ) return tf.reshape(patches, [batch_size, -1, patch_size*patch_size*1]) class TransformerBlock(tf.keras.layers.Layer): def __init__(self, embed_dim, num_heads): super().__init__() self.att = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim) self.ffn = tf.keras.Sequential([ tf.keras.layers.Dense(embed_dim*4, activation="relu"), tf.keras.layers.Dense(embed_dim) ]) self.layernorm1 = tf.keras.layers.LayerNormalization() self.layernorm2 = tf.keras.layers.LayerNormalization() def call(self, inputs): attn_output = self.att(inputs, inputs) out1 = self.layernorm1(inputs + attn_output) ffn_output = self.ffn(out1) return self.layernorm2(out1 + ffn_output) class PositionEmbedding(tf.keras.layers.Layer): def __init__(self, max_len, embed_dim): super().__init__() self.pos_emb = tf.keras.layers.Embedding(input_dim=max_len, output_dim=embed_dim) def call(self, x): positions = tf.range(start=0, limit=tf.shape(x)[1], delta=1) return x + self.pos_emb(positions) def build_transformer_model(): inputs = tf.keras.Input(shape=(49, 16)) # 4x4 patches x = tf.keras.layers.Dense(64)(inputs) # 嵌入维度64 # 添加位置编码 x = PositionEmbedding(max_len=49, embed_dim=64)(x) # 堆叠Transformer模块 x = TransformerBlock(embed_dim=64, num_heads=4)(x) x = TransformerBlock(embed_dim=64, num_heads=4)(x) # 分类头 x = tf.keras.layers.GlobalAveragePooling1D()(x) outputs = tf.keras.layers.Dense(10, activation="softmax")(x) return tf.keras.Model(inputs=inputs, outputs=outputs) model = build_transformer_model() model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) # 数据预处理 train_images_pt = image_to_patches(train_images[..., tf.newaxis]) test_images_pt = image_to_patches(test_images[..., tf.newaxis]) history = model.fit( train_images_pt, train_labels, validation_data=(test_images_pt, test_labels), epochs=10, batch_size=128 )```Exception has occurred: InvalidArgumentError input must be 4-dimensional[60000,28,28,1,1] [Op:ExtractImagePatches] tensorflow.python.eager.core._NotOkStatusException: InvalidArgumentError: input must be 4-dimensional[60000,28,28,1,1] [Op:ExtractImagePatches] During handling of the above exception, another exception occurred: File "D:\source\test3\transform.py", line 32, in image_to_patches patches = tf.image.extract_patches( File "D:\source\test3\transform.py", line 118, in <module> train_images_pt = image_to_patches(train_images[..., tf.newaxis]) # 输出形状(60000,49,16) tensorflow.python.framework.errors_impl.InvalidArgumentError: input must be 4-dimensional[60000,28,28,1,1] [Op:ExtractImagePatches]
最新发布
03-12
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值