批量处理脚本

最新推荐文章于 2025-05-15 16:41:58 发布

kingskynine

最新推荐文章于 2025-05-15 16:41:58 发布

阅读量474

点赞数

CC 4.0 BY-SA版权

分类专栏： python 文章标签：批处理

本文链接：https://blog.youkuaiyun.com/kingskynine/article/details/99827103

python 专栏收录该内容

5 篇文章

订阅专栏

本文介绍了一种使用Python进行多线程文件处理的方法，包括文件搜索、解压缩及图像转换。通过设置线程数量限制，确保了系统资源的有效利用。文章详细描述了如何递归遍历目录查找特定文件，以及如何使用线程处理找到的文件，包括解压.tar.gz文件和转换图像格式。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

import os,sys
import shutil
import copy
import time
import sys
import re
import platform
import threading

max_threadnum = 4
semaphore = threading.BoundedSemaphore(max_threadnum)

def makedir(dst_filepath):
    folder = os.path.exists(dst_filepath)
    if not folder:
        os.makedirs(dst_filepath)


def iter_files(input, re_keyword):
    aim_file_list = []
    curlist = os.listdir(input)
    name_regex = re.compile(re_keyword)
    for file in curlist:
        cur_file = os.path.join(input, file)
        if os.path.isdir(cur_file):
            get_list = iter_files(cur_file,re_keyword)
            aim_file_list = aim_file_list + get_list
        else:
            mo = name_regex.search(file)
            if mo:
                aim_file_list.append(cur_file)

    return aim_file_list

def tarmove(filepath,input_dir,output_dir,id):
    semaphore.acquire()
    time_start=time.time()
    
    report_txt = "report.txt"
    if (filepath.find("--")>=0):
        with open(report_txt, 'a+') as f:
            f.write("continue: %s\n" %(filepath))
        return 1
    fpath, fname = os.path.split(filepath)  # 分离文件名和路径
    fname = fname.replace(".tar.gz","")
    dst_fpath = fpath.replace(input_dir,output_dir)
    dst_fpath = os.path.join(dst_fpath,fname)
    makedir(dst_fpath)
    cmd = "tar -xvf %s -C %s>>%s 2>&1" %(filepath,dst_fpath,"log.txt")
    return_code = os.system(cmd)
    if return_code!=0:
        with open(report_txt, 'a+') as f:
            f.write("fail: %s\n" %(filepath))
        
    time_end=time.time()
    print("file[%d] totally cost %f\n" %(id,(time_end-time_start)))
    semaphore.release()        
    return 0

def googlecut(filepath,input_dir,output_dir,id):
    semaphore.acquire()
    time_start=time.time()
    report_txt = "report.txt"
    exefile = "./GoogleCut"
    return_code = 0
    if (filepath.find("--")>=0):
        with open(report_txt, 'a+') as f:
            f.write("continue: %s\n" %(filepath))
        return 1
    fpath, fname = os.path.split(filepath)  # 分离文件名和路径
    fname = fname.replace("L17","1m")
    dst_fpath = fpath.replace(input_dir,output_dir)
    makedir(dst_fpath)
    dsf_file = os.path.join(dst_fpath,fname)
    if os.path.isfile(dsf_file):
        with open(report_txt, 'a+') as f:
            f.write("pass: %s\n" %(filepath))
        time_end=time.time()
        print("img[%d] totally cost %f\n" %(id,(time_end-time_start)))
        semaphore.release()
        return 0
    cmd = "%s %s %s>>%s 2>&1" %(exefile,filepath,dsf_file,"log.txt")
    #print(cmd)
    #time.sleep(0.5)
    return_code = os.system(cmd)
    if return_code!=0:
        with open(report_txt, 'a+') as f:
            f.write("fail: %s\n" %(filepath))
    time_end=time.time()
    print("img[%d] totally cost %f\n" %(id,(time_end-time_start)))
    semaphore.release()
    return 0

def work_flow(input_dir,output_dir):
    
    #re = ".tar.gz$"
    r'^JB\d(\d\w){0,1}-\d_SAR_\d{9}(_\d\d\d){3}_L1B.tiff$'
    re = r"_L17.tif$"
    file_list = iter_files(input_dir,re)
    totalnum = len(file_list)
    print("totalnum: %d" %(totalnum))
    
    for i in range(totalnum):
        file = file_list[i]
        googlecut(file,input_dir,output_dir,i)
        #tarmove(file,input_dir,output_dir)
def work_flow_30demvrt(input_dir,output_vrt):
    exe = "gdalbuildvrt"
    #re = ".tar.gz$"
    r'^JB\d(\d\w){0,1}-\d_SAR_\d{9}(_\d\d\d){3}_L1B.tiff$'
    re = r"_dem.tif$"
    file_list = iter_files(input_dir,re)
    totalnum = len(file_list)
    print("totalnum: %d" %(totalnum))
    fpath, fname = os.path.split(output_vrt)  # 分离文件名和路径
    input_file_list = os.path.join(fpath,"filelist.txt")
    with open(input_file_list, 'a+') as f:
        for filepath in file_list:
            f.write("%s\n" %(input_file_list))
    
    cmd = "%s %s %s" %(exe,input_file_list,output_vrt)
    returncode = os.system(cmd)
    if return_code!=0:
        print("build vrt fail\n")
def work_flow_thread(input_dir,output_dir):
    
    #re = ".tar.gz$"
    r'^JB\d(\d\w){0,1}-\d_SAR_\d{9}(_\d\d\d){3}_L1B.tiff$'
    re = r"_L17.tif$"
    file_list = iter_files(input_dir,re)
    totalnum = len(file_list)
    print("totalnum: %d" %(totalnum))
     
    for i in range(totalnum):
        file = file_list[i]
        t = threading.Thread(target=googlecut,args = (file,input_dir,output_dir,i))
        t.start()

def work_flow_thread2(input_dir,output_dir):
    
    re = ".tar.gz$"
    file_list = iter_files(input_dir,re)
    re2 = ".tar$"
    file_list2 = iter_files(input_dir,re2)
    file_list.extend(file_list2)
    totalnum = len(file_list)
    print("totalnum: %d" %(totalnum))
     
    for i in range(totalnum):
        file = file_list[i]
        t = threading.Thread(target=tarmove,args = (file,input_dir,output_dir,i))
        t.start()

if __name__ == '__main__':
    if len(sys.argv) !=3:
        print("input: inputdir,outputvrt")
        exit(1)
    input_dir = sys.argv[1]
    output_dir = sys.argv[2]
    work_flow_getdem(input_dir,output_dir)