用FCN训练自己数据集（Tensorflow)

最新推荐文章于 2020-09-02 01:00:32 发布

原创最新推荐文章于 2020-09-02 01:00:32 发布 · 2.5k 阅读

6 ·

CC 4.0 BY-SA版权

机器学习与深度学习专栏收录该内容

10 篇文章

订阅专栏

本文解决了一个在使用TensorFlow进行图像识别时遇到的数据读取问题，原脚本中缺少maybe_download_and_extract函数，通过修改read_MITSceneParsingData.py脚本，实现了对MITSceneParsing数据集的正确读取和预处理。

参考地址：https://blog.youkuaiyun.com/qq_40994943/article/details/85041493#commentsedit

针对原文不能正常运行的问题，进行说明。

本人的label图片和原图片都是jpg格式

主要问题：原文写的TensorFlowUtils里面就没有maybe_download_and_extract函数

修改方式是更改

read_MITSceneParsingData.py

这个py脚本的内容，如下：



# coding=utf-8
import numpy as np
import os
import random
from six.moves import cPickle as pickle
from tensorflow.python.platform import gfile
import glob

import TensorflowUtils as utils

# DATA_URL = 'http://sceneparsing.csail.mit.edu/data/ADEChallengeData2016.zip'
DATA_URL = 'http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip'


def read_dataset(data_dir, data_name):
    pickle_filename = "MITSceneParsing.pickle"
    pickle_filepath = os.path.join(data_dir, pickle_filename)
    #
    # if not os.path.exists(pickle_filepath):  # 不存在文件
    #     utils.maybe_download_and_extract(data_dir, DATA_URL, is_zipfile=True)  # 不存在文件 则下载
    #     SceneParsing_folder = os.path.splitext(DATA_URL.split("/")[-1])[0]  # ADEChallengeData2016
    result = create_image_lists(os.path.join(data_dir, data_name))
    print("Pickling ...")
    with open(pickle_filepath, 'wb') as f:
        pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
    # else:
    #     print("Found pickle file!")

    with open(pickle_filepath, 'rb') as f:  # 打开pickle文件
        result = pickle.load(f)
        training_records = result['training']
        validation_records = result['validation']
        del result

    return training_records, validation_records


'''
  返回一个字典:
  image_list{ 
           "training":[{'image': image_full_name, 'annotation': annotation_file, 'image_filename': },......],
           "validation":[{'image': image_full_name, 'annotation': annotation_file, 'filename': filename},......]
           }
'''


def create_image_lists(image_dir):
    if not gfile.Exists(image_dir):
        print("Image directory '" + image_dir + "' not found.")
        return None
    directories = ['training', 'validation']
    image_list = {}

    for directory in directories:  # 训练集和验证集 分别制作
        file_list = []
        image_list[directory] = []

        # 获取images目录下所有的图片名
        file_glob = os.path.join(image_dir, "images", directory, '*.' + 'png')
        file_list.extend(glob.glob(file_glob))  # 加入文件列表  包含所有图片文件全路径+文件名字  如 Data_zoo/MIT_SceneParsing/ADEChallengeData2016/images/training/hi.jpg

        if not file_list:
            print('No files found')
        else:
            for f in file_list:  # 扫描文件列表   这里f对应文件全路径
                # 注意注意，下面的分割符号，在window上为：\\,在Linux撒花姑娘为 : /
                filename = os.path.splitext(f.split("\\")[-1])[0]  # 图片名前缀
                annotation_file = os.path.join(image_dir, "annotations", directory, filename + '.png')
                if os.path.exists(annotation_file):
                    record = {'image': f, 'annotation': annotation_file, 'filename': filename}#  image:图片全路径， annotation:标签全路径， filename:图片名字
                    image_list[directory].append(record)
                else:
                    print("Annotation file not found for %s - Skipping" % filename)

        random.shuffle(image_list[directory])  # 对图片列表进行洗牌
        no_of_images = len(image_list[directory])  # 包含图片文件的个数
        print('No. of %s files: %d' % (directory, no_of_images))

    return image_list

即可。