一文了解 Python 图像处理

最新推荐文章于 2023-05-06 11:27:25 发布

原创最新推荐文章于 2023-05-06 11:27:25 发布 · 549 阅读

1 ·

CC 4.0 BY-SA版权

文章标签：

#python #opencv

Python 同时被 3 个专栏收录

85 篇文章

订阅专栏

项目相关

37 篇文章

订阅专栏

图像、视频处理

16 篇文章

订阅专栏

本文介绍了Python中处理图像的基本操作，包括使用二进制和数组方式读取图像，使用OpenCV进行图像缩放，以及设置图像显示的画布大小和位置。此外，还展示了如何在图像处理后进行Base64编码，以便在网络传输中使用。文章还涵盖了图像显示的相关技巧，如调整画布分辨率和位置。

Part1

1. 图像读取

网上各种图像读取方法，我这里只根据目的划分为二进制和数组读取，两种方式——前者用于网络传输，后者用于广义上的图像处理。

1.1 二进制读取

使用ope() 以及 read()即可，其中使用'rb'指定以二进制的方式读取，用read而不是readline或者readlines，是读取全部的数据。

with open(data_path, 'rb') as f: 
    img = f.read()
    print(f"type(img): {type(img)}") # <class 'bytes'>
    print()
#     print(f"img: {img}")

1.2 数组读取

在这里就有很多库可以实现了，比如PIL、matplotlib、opencv、keras 等。
其中，opencv 使用最为广泛。

import cv2 

img = cv2.imread(data_path)
print(f"type(img): {type(img)}") # <class 'numpy.ndarray'>
print()
print(f"img:\n{img}")

需要注意的是，若 opencv 读取的是彩色图片，则其格式为 BGR 格式，而不是 matplotlib 的 RGB。

2. 图像处理

2.1 图像缩放

可以使用 opencv 的 resize()函数来完成。

注意 cv2.resize() 的一个坑：其第二个参数的 shape 为(width, height)，而不是cv2.read()后 img 的形状：(height, width)

介绍两种思路：等比缩放和固定比例缩放

# 图像缩放
height = img.shape[0]
width = img.shape[1]
# ① 长、宽等比缩放
ratio = 0.8  # 每次缩放 0.8
height = int(height * ratio)
width = int(width * ratio)

# # ② 以长边为基准 （16：9）
# ratio = 9/16
# if height > width:
#     width = int(ratio * height)
# else:
#     height = int(ratio * width)
img = cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA)  # interpolation：指定插值的方式

2.2 图像裁剪

暂略

2.3 像素级操作

暂略

完整代码

def img_processing(face_path):
    '''
    图像处理 （注册的时候使用）
    :param: face_path: 完整图片路径（人脸图片位于相应姓名的文件下，注意图像取名的格式-包含下划线"_"）
    :return: dic —— {"user_name":user_name , "pic_type": pic_type, "pic": pic} ，即图像名称、图像格式、图像数据
    '''
    ENCODING = 'utf-8'

    face_name = os.path.split(face_path)[1]   # 获取带后缀的图片名 (os.path.split返回路径和文件名两部分)
    end_with = str(face_name.split('.')[-1])
    end_with = "." + end_with       # 获取文件后缀(带上".")
    # print(f"end_with: {end_with}")

    # 防止命名不合规
    if "_" in face_name:
        user_name = face_name.split('_')[0]       # 根据图片名获得人名 （此处有很多 bug 需要注意:①不包含"_" ②包含多个"_"等）
    else:
        print(f"格式错误：文件命名没有发现下划线")
        user_name = "Unknown"

    # 确定图片类型
    if end_with.lower() == '.bmp':
        pic_type = 3
    elif end_with.lower() =='.png':
        pic_type = 2
    else:
        pic_type = 1     # jpg 和 jpeg 是相同的格式，均归为 1

    # 图像读取和缩放
    img_name = os.path.split(face_path)[1]  # 图片原始名称v1

    img_size = os.path.getsize(face_path) / 1024
    print(f"img_size: {round(img_size, 3)}k")

    # 根据不同的图片大小，使用不同的图片 read 方式
    if img_size < 300:
    # ① 图片小于 300k，则以二进制直接读取图片
    #    过程：open —> b64encode —> decode -> 传输
        with open(face_path, 'rb') as f:
            img = f.read()
    else:
        # ② 图片大于等于 300k，则以opencv 读取图片，便于图片压缩
        #    过程：v2.imread —> 缩放处理—>  cv2.imencode —>  b64encode —> decode -> 传输
        img = cv2.imread(face_path)          # imread：——> np 图片
        print(f"Original Dimensions: {img.shape}")
        # 图片压缩

        # 循环：使用 oepncv 的 resize() 进行等比缩放
        while img_size >= 300:  # 对大于 300k 的图片进行缩处理
            print("Start image compressing...")
            # 图像缩放
            height = img.shape[0]
            width = img.shape[1]
            # ① 长、宽等比缩放
            ratio = 0.8  # 每次缩放 0.8
            height = int(height * ratio)
            width = int(width * ratio)

            # # ② 以长边为基准 （16：9）
            # ratio = 9/16
            # if height > width:
            #     width = int(ratio * height)
            # else:
            #     height = int(ratio * width)
            img = cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA)  # interpolation：指定插值的方式
            # 保存图片
            save_path = "data/save/fixed/" + img_name  # 这里将 resize 后的图片另存，待下次再读取大小
            cv2.imwrite(save_path, img)

            img_size = os.path.getsize(save_path) / 1024

        print(f"Resized Dimensions: {img.shape}")
        print()
        print((f"Resized szie: {round(img_size, 3)} k"))

        # 进行编码（np——>）
        img = cv2.imencode(end_with.lower(), img)[1]   # end_with 即为图片的格式
        # print(f"img_encode: {len(img)}")

    # 按指定编码的数据，编码成 base64 bytes
    base64_bytes = b64encode(img)
    # print(f"base64_bytes: {len(base64_bytes)},{len(base64_bytes) / 1024}k")
    # 将 base64 字节码解码成 utf-8 格式的字符串
    base64_string = base64_bytes.decode(ENCODING)
    # print(f"base64_string: {len(base64_string)}, {len(base64_string)  / 1024}k")

    pic = base64_string

    data = {"user_name":user_name , "pic_type": pic_type, "pic": pic}   # 通过读取文件夹和文件获取 user_name 、pic_type、pic
    return data

Part2

3. 图像显示

3.1 设置画布大小

plt.figure()中使用 figsize 参数即可

plt.figure(figsize=(width * 0.1, height * 0.1), dpi=10)  # 此时图片像素：px, py = a*dpi, b*dpi

3.2 设置画布位置

    # 设置/固定画布在屏幕上的位置
    mngr = plt.get_current_fig_manager()  # 获取当前figure manager
    mngr.window.wm_geometry("+400+310")  # 调整窗口在屏幕上弹出的位置 其中+380 指窗口左上角顶点的横坐标向X轴正方向移动400，+310 指窗口左上角顶点的纵坐标向Y轴正方向移动310。对

调整图片在画布中的位置

plt.subplots_adjust(top = 0.9, bottom = 0.25, left = 0.1, right = 0.9, hspace = 0 , wspace = 0) # 取值为 0-1 之间（调整图片在画布中的位置）

3.4 设置图像分辨率

plt.figure()中使用 dpi 参数即可

plt.figure(figsize=(width * 0.1, height * 0.1), dpi=10)  # 此时图片像素：px, py = a*dpi, b*dpi

完整代码

# play image
def img_playing(face_path, playing_time):
    '''
    图片轮播 （认证的时候使用）
    :param image_data: 读取后的 img 数据
    :param num_img_all: 测试的图片数量 （影响到while循环）
    :return:
    '''
    global num_initial_compare_lst
    global num_compared

    face_name = os.path.split(face_path)[1]   # 获取带后缀的图片名 (os.path.split返回路径和文件名两部分)
    end_with = str(face_name.split('.')[-1])
    end_with = "." + end_with       # 获取文件后缀(带上".")
    # print(f"end_with: {end_with}")

    # 防止命名不合规
    if "_" in face_name:
        user_name = face_name.split('_')[0]       # 根据图片名获得人名 （此处有很多 bug 需要注意:①不包含"_" ②包含多个"_"等）
    else:
        print(f"格式错误：文件命名没有发现下划线")
        user_name = "Unknown"

    # # 确定图片类型
    # if end_with.lower() == '.bmp':
    #     pic_type = 3
    # elif end_with.lower() =='.png':
    #     pic_type = 2
    # else:
    #     pic_type = 1     # jpg 和 jpeg 是相同的格式，均归为 1

    # 图像读取和缩放
    # img_name = os.path.split(face_path)[1]  # 图片原始名称v1

    # 读取图片
    # 使用 oepncv 的 resize() 进行等比缩放
    img = cv2.imread(face_path)
    print(f"Original Dimensions: {img.shape}")
    #
    # # 图像缩放
    # height = img.shape[0]
    # width = img.shape[1]
    # # # ① 长、宽等比缩放
    # # ratio = 0.6
    # # height = int(height * ratio)
    # # width = int(width * ratio)
    #
    # # ② 以长边为基准 （16：9）
    # ratio = 9/16
    # if height > width:
    #     width = int(ratio * height)
    # else:
    #     height = int(ratio * width)
    # img = cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA)  # interpolation：指定插值的方式
    # print(f"Resized Dimensions: {img.shape}")

    # 显示图片
    height = img.shape[0]
    width = img.shape[1]

    max_size =  (2560, 1600)  # mac 分辨率
    max_height = max_size[1] * 0.6   # 图片最大高度

    ratio = 9/16
    if height > width:
        height = max_height
        width = int(ratio * height)
    else:
        height = max_height
        width = int(1/ratio * height)
    print(f"Resized Dimensions: {img.shape}")

    plt.figure(figsize=(width * 0.1, height * 0.1), dpi=10)  # 此时图片像素：px, py = a*dpi, b*dpi
    # plt.subplots_adjust(top = 0.9, bottom = 0.25, left = 0.1, right = 0.9, hspace = 0 , wspace = 0) # 取值为 0-1 之间（调整图片在画布中的位置）
    plt.margins(0, 0)

    # 设置/固定画布在屏幕上的位置
    mngr = plt.get_current_fig_manager()  # 获取当前figure manager
    mngr.window.wm_geometry("+400+310")  # 调整窗口在屏幕上弹出的位置 其中+380 指窗口左上角顶点的横坐标向X轴正方向移动400，+310 指窗口左上角顶点的纵坐标向Y轴正方向移动310。对


    # 使用plt 显示图片之前进行转换：BGR -> RGB
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)   # BGR -> RGB
    # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # RGB -> 灰度
    # # 需要添加colormap 颜色映射函数为gray
    # plt.imshow(gray, cmap="gray")
    fontsize = int(width / 5)
    plt.title(user_name, fontsize=fontsize, color='r')  # 不显示标题可能是字体大小的问题
    plt.imshow(img)
    # # 暂停或者播放下一张
    print("-----------继续采集照片中----------")
    plt.pause(playing_time)   #
    plt.clf()         # clear the current figure
    plt.close('all')  # 关闭所有图：避免重叠