图像常用的数据增强技术(based on TensorFlow)

本文介绍了基于TensorFlow的四种图像数据增强技术,强调了数据预处理在深度学习模型性能中的关键作用。通过Python和TensorFlow,可以有效地构建图像数据输入管道,提升模型的泛化能力。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

数据作为深度学习的基础,其对模型性能的重要重要性不言而喻。在本文,我们将梳理下常用的图像数据处理技术,至于具体的编程工具,选择 Python + TensorFlow:

Tip:

如果你使用 tf.data 来组织你的图像数据输入管道,那么恭喜您,直接在 map 中调用本文的代码。

第一种

在这里插入图片描述

#coding:utf-8
# preprocess_lenet.py

"""
 Train
    step1: resize image with crop or pad
    step2: subtract 128.0
    step3: div 128.0
 Eval
    step1: resize image with crop or pad
    step2: subtract 128.0
    step3: div 128.0
"""

import tensorflow as tf


def preprocess_image(image, label, is_training, 
                     out_height=28, out_width=28):
  """Preprocesses the given image.

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    is_training: `True` if we're preprocessing the image for training and
      `False` otherwise.

  Returns:
    A preprocessed image.
  """
  image = tf.cast(image, tf.float32)  # (0.0~255.0)
  image = tf.image.resize_image_with_crop_or_pad(
      image, out_width, out_height)
  image = tf.subtract(image, 128.0)
  image = tf.div(image, 128.0)
  return image, label

第二种

在这里插入图片描述

#coding:utf-8
#preprocess_cifar.py

"""
 Train
    step1: if pad
    step2: random crop
    step3: random flip left right
    step4: random brightness
    step5: random contrast
    step6: per image standardization
 Eval
    step1: resize image with crop or pad
    step2: per image standardization
"""

import tensorflow as tf

_PADDING = 4


def preprocess_image(image, label, is_training, 
                     out_height=32, out_width=32,
                     add_image_summaries=False):
  """Preprocesses the given image.

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    is_training: `True` if we're preprocessing the image for training and
      `False` otherwise.
    add_image_summaries: Enable image summaries.

  Returns:
    A preprocessed image.
  """
  if is_training:
    image =  preprocess_for_train(
        image, out_height, out_width,
        add_image_summaries=add_image_summaries)
  else:
    image = preprocess_for_eval(
        image, out_height, out_width,
        add_image_summaries=add_image_summaries)
  return image, label


def preprocess_for_train(image,
                         output_height,
                         output_width,
                         padding=_PADDING,
                         add_image_summaries=True):
  """Preprocesses the given image for training.

  Note that the actual resizing scale is sampled from
    [`resize_size_min`, `resize_size_max`].

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    padding: The amound of padding before and after each dimension of the image.
    add_image_summaries: Enable image summaries.

  Returns:
    A preprocessed image.
  """
  if add_image_summaries:
    tf.summary.image('image', tf.expand_dims(image, 0))

  # Transform the image to floats(0.0~255.0).
  image = tf.cast(image, tf.float32)
  if padding > 0:
    image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]])
  # Randomly crop a [height, width] section of the image.
  distorted_image = tf.random_crop(image,
                                   [output_height, output_width, 3])

  # Randomly flip the image horizontally.
  distorted_image = tf.image.random_flip_left_right(distorted_image)

  if add_image_summaries:
    tf.summary.image('distorted_image', tf.expand_dims(distorted_image, 0))

  # Because these operations are not commutative, consider randomizing
  # the order their operation.
  distorted_image = tf.image.random_brightness(distorted_image,
                                               max_delta=63)
  distorted_image = tf.image.random_contrast(distorted_image,
                                             lower=0.2, upper=1.8)
  # Subtract off the mean and divide by the variance of the pixels.
  return tf.image.per_image_standardization(distorted_image)


def preprocess_for_eval(image, output_height, output_width,
                        add_image_summaries=True):
  """Preprocesses the given image for evaluation.

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    add_image_summaries: Enable image summaries.

  Returns:
    A preprocessed image.
  """
  if add_image_summaries:
    tf.summary.image('image', tf.expand_dims(image, 0))
  # Transform the image to floats.
  image = tf.to_float(image)

  # Resize and crop if needed.
  resized_image = tf.image.resize_image_with_crop_or_pad(image,
                                                         output_width,
                                                         output_height)
  if add_image_summaries:
    tf.summary.image('resized_image', tf.expand_dims(resized_image, 0))

  # Subtract off the mean and divide by the variance of the pixels.
  return tf.image.per_image_standardization(resized_image)

第三种

在这里插入图片描述

#coding:utf-8
#preprocess_vgg.py

"""
 Train
    step1: _aspect_preserving_resize
           # 在不改变图像宽高比的基础上,
           # 将图像的窄边缩放到_RESIZE_SIDE_MIN和_RESIZE_SIDE_MAX之间
    step2: _random_crop
           # 将图像随机裁剪到 out_height, out_width
           # 如果out_height, out_width比缩放后的图像大,报错。
    step3: random flip left right
    step4: _mean_image_subtraction
           # RGB每个通道减去整个数据集的RGB通道的均值
 Eval
    step1: _aspect_preserving_resize
    step2: _central_crop
    step3: _mean_image_subtraction
"""

import tensorflow as tf

_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94

_RESIZE_SIDE_MIN = 256
_RESIZE_SIDE_MAX = 512


def preprocess_image(image, label, is_training,
                     out_height=224, out_width=224,
                     resize_side_min=_RESIZE_SIDE_MIN,
                     resize_side_max=_RESIZE_SIDE_MAX):
  """Preprocesses the given image.

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    out_height: The height of the image after preprocessing.
    out_width: The width of the image after preprocessing.
    is_training: `True` if we're preprocessing the image for training and
      `False` otherwise.
    resize_side_min: The lower bound for the smallest side of the image for
      aspect-preserving resizing. If `is_training` is `False`, then this value
      is used for rescaling.
    resize_side_max: The upper bound for the smallest side of the image for
      aspect-preserving resizing. If `is_training` is `False`, this value is
      ignored. Otherwise, the resize side is sampled from
        [resize_size_min, resize_size_max].

  Returns:
    A preprocessed image.
  """
  if is_training:
    image = preprocess_for_train(image, out_height, out_width,
                                 resize_side_min, resize_side_max)
  else:
    image = preprocess_for_eval(image, out_height, out_width,
                                resize_side_min)
  return image, label


def preprocess_for_train(image,
                         out_height,
                         out_width,
                         resize_side_min=_RESIZE_SIDE_MIN,
                         resize_side_max=_RESIZE_SIDE_MAX):
  """Preprocesses the given image for training.

  Note that the actual resizing 
图像的增强/////////////////////////////////直方图对话框构造函数; ZFT::ZFT(CWnd* pParent /*=NULL*/) : CDialog(ZFT::IDD, pParent)//ZFT为定义的用来显示直方图的对话框类; {  Width=Height=0;//对话框初始化阶段设置图像的宽和高为"0"; } ////////////////////////对话框重画函数; void ZFT::OnPaint() {  CRect rect;//矩形区域对象;  CWnd *pWnd;//得到图片框的窗口指针;  pWnd=GetDlgItem(IDC_Graphic);//得到ZFT对话框内的"Frame"控件的指针;  file://(IDC_Graphic为放置在对话框上的一个"Picture"控件,并讲类型设置为"Frame")。  pWnd->GetClientRect(&rect);//得到"Frame"控件窗口的"视"区域;  int i;  CPaintDC dc(pWnd);//得到"Frame"控件的设备上下文;  file://画直方图的x、y轴;  dc.MoveTo(0,rect.Height());  dc.LineTo(rect.Width(),rect.Height());  dc.MoveTo(0,rect.Height());  dc.LineTo(0,0);  file://画直方图,num[]是"ZFT"的内部数组变量,存放的是图像各个灰度级出现的概率;该数组的各个分量在  显示具体图像的直方图时设置;  for(i=0;iGetWindowRect(&rect);//获取pWnd窗口对象窗口区域位置;  file://屏幕坐标转换为客户区坐标;  ScreenToClient(&rect);  file://判断当前鼠标是否指在直方图内;  if(rect.PtInRect (point))  {   int x=point1.x-rect.left;   file://当前鼠标位置减去区域的起始位置恰好为当前鼠标所指位置所表示的灰度级;   string.Format("%d",x);   file://显示当前位置对应的图像的灰度级;   pWndText->SetWindowText((LPCTSTR)string);  }  CDialog::OnMouseMove(nFlags, point); } //////////////////////////////////////// void CDibView::OnImagehorgm() file://在程序的"视"类对象内处理显示图像直方图的函数; {  CDibDoc *pDoc=GetDocument();  HDIB hdib;  hdib=pDoc->GetHDIB();  BITMAPINFOHEADER *lpDIBHdr;//位图信息头结构指针;  BYTE *lpDIBBits;//指向位图像素灰度值的指针;  lpDIBHdr=( BITMAPINFOHEADER *)GlobalLock(hdib);//得到图像的位图头信息  lpDIBBits=(BYTE*)lpDIBHdr+sizeof(BITMAPINFOHEADER)+256*sizeof(RGBQUAD);  file://获取图像像素值  ZFT dialog;//直方图对话框模板对象;  int i,j;  int wImgWidth=lpDIBHdr->biWidth;  int wImgHeight=lpDIBHdr->biHeight;  file://a[]数组用来存放各个灰度级出现的概率;  float a[256];  for(i=0;i<256;i++)//初始化数组;  {     a[i]=0;  }  file://统计各个灰度级出现的次数;  for(i=0;i<wimgheight;i++)  {   for(j=0;j<wimgwidth;j++)   {    a[*(lpDIBBits+WIDTHBYTES(wImgWidth*8)*i+j)]++;   }   file://统计各个灰度级出现的概率;   for(i=0;i<256;i++)   {    a[i]=a[i]/(wImgHeight*wImgWidth);//得到每个灰度级的出现概率;    memcpy(dialog.num,a,256*sizeof(float));   }  }   dialog.DoModal();//显示直方图对话框;  }  return; } </wimgwidth;j++) </wimgheight;i++)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值