cuda-convnet 中数据组织格式

本文介绍了一种用于图像数据预处理的方法,包括批量加载、翻转、旋转及标准化等步骤,并生成对应的元数据文件,便于后续的机器学习模型训练。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

import os
import cPickle
import numpy as np
import Image
import random

def ClassName(item):
    if item.startswith("acc"):
        return [0]
    elif item.startswith("bas"):
        return [1]
    elif item.startswith("cam"):
        return [2]
    elif item.startswith("dal"):
        return [3]
    elif item.startswith("ele"):
        return [4]
    elif item.startswith("fer"):
        return [5]
    elif item.startswith("gra"):
        return [6]
    elif item.startswith("ket"):
        return [7]
    elif item.startswith("lot"):
        return [8]
    elif item.startswith("pyr"):
        return [9]
        
def MakeBatch(load_path_list, save_path, class_list):
    data = []
    filenames = []
    for load_path in load_path_list:    
        file_list = os.listdir(load_path)
        for item in file_list:
            if item.endswith(".jpg"):
                n = os.path.join(load_path, item)
                input = Image.open(n)
                arr = np.array(input, order='C')
                im = np.fliplr(np.rot90(arr, k=3))
                data.append(im.T.flatten('C'))
                filenames.append(item)
            class_list += ClassName(item)
    data = np.array(data)
    out_file = open(save_path, 'w+')
    flipDat = np.flipud(data)
    rotDat = np.rot90(flipDat, k=3)
    dic = {'batch_label':'training batch 1 of 1', 'data':rotDat, 'labels':class_list, 'filenames':filenames}
    cPickle.dump(dic, out_file, True)
    out_file.close()
    
def MakeMate(data_batch_list, label_name_list, num_vis, save_path):

    aBatch = file(data_batch_list[0], 'rb')
    l = cPickle.load(aBatch)
    num_cases_per_batch = len(l['data'][0])
    
    label_name = label_name_list
    tmp = [0]*len(l['data'])
    data_mean = [[0]]*len(l['data'])
    
    for i in range(len(data_batch_list)):
        f = file(data_batch_list[i], 'rb')
        l = cPickle.load(f)
        for j in range(len(l['data'])): # a batch 196608
            for k in range(len(l['data'][j])): # a row 60
                tmp[j] += l['data'][j][k]
                 
    for i in range(len(tmp)):
        data = 0.0
        data = round(float(tmp[i])/(len(data_batch_list)*num_cases_per_batch), 8)
        t = [data]
        data_mean[i] = t
    
    data_mean = np.array(data_mean, dtype='f')
    
    print data_mean
    
    dic = {'num_cases_per_batch':num_cases_per_batch, 'label_names':label_name_list, 'num_vis':num_vis, 'data_mean':data_mean}
    out_file = open(save_path, 'w+')
    cPickle.dump(dic, out_file, True)
    out_file.close()
    
if __name__=='__main__':
    #class_list is a list that contain nums labels
   
    load_path_list = ['/home/tj/Pictures/MakeBatch/test/data_batch_5']
    batch_save_path = '/home/tj/Pictures/MakeBatch/OUTPUT/data_batch_5'
    class_list = [] #first class must be named '0', and the class number must be a series.
    MakeBatch(load_path_list, batch_save_path, class_list)
    
    label_name_list = ['class1', 'class2', 'class3', 'class4', 'class5', 'class6', 'class7', 'class8', 'class9', 'class10']    
    data_batch_list = ['/home/tj/Pictures/MakeBatch/OUTPUT/data_batch_1','/home/tj/Pictures/MakeBatch/OUTPUT/data_batch_2','/home/tj/Pictures/MakeBatch/OUTPUT/data_batch_3','/home/tj/Pictures/MakeBatch/OUTPUT/data_batch_4']
    meta_save_path = '/home/tj/Pictures/MakeBatch/OUTPUT/batches.meta'
    num_vis = 3072
    MakeMate(data_batch_list, label_name_list, num_vis, meta_save_path)
    
    print 'end'
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值