""" 多线程分块(按字节进行)读取多个文件 1、统计文件的大小 2、根据线程数对文件大小进行分块 3、多线程根据分块行进行读取文件 """ import os from concurrent.futures.thread import ThreadPoolExecutor import time def size_file(file): """ 统计文件大小(字节) :return: """ count = os.path.getsize(file) return count def part_size_file(file, thread_num): """ 根据文件字节数对文件按字节大小进行分块 :return: """ size = size_file(file) num = size // thread_num # 计算每块的行数 i = 0 tt = [] # 存放分块的行数 t = 0 while i < thread_num - 1: tt.append((t + i, (i + 1) * num)) t = (i + 1) * num i += 1 tt.append((t + 1, size)) return tt def Read_file(args): """ 分块读取文件 :param tt: :param file: :return: """ start_pos = args[0][0] end_pos = args[0][1] print(args[0][0], args[0][1]) with open(args[1], "r", encoding="gbk", errors="ignore") as f: if start_pos != 0: f.seek(args[0][0] - 1) if f.read(1) != '\n': line = f.readline() start_pos = f.tell() f.seek(args[0][0]) # print(start_pos) while (start_pos <= end_pos): line = f.readline() # print(line) ''' do somthing ''' start_pos = f.tell() f.close() def thread_pool_readfile(file): """ 多线程分块读取文件 :return: """ thread_num = 4 tt = part_size_file(file, thread_num=thread_num) p = ThreadPoolExecutor(thread_num) for i in range(len(tt)): p.submit(Read_file, args=(tt[i], file)) # time.sleep(2) p.submit(True) if __name__ == '__main__': file = r"E:\数据处理\资源树换新数据源\v_uplink.txt" start = time.clock() count = size_file(file) thread_pool_readfile(file) end = time.clock() print("Cost time %s seconds" % (end - start))
多线程分块(按字节进行)读取多个文件
最新推荐文章于 2023-03-07 17:38:29 发布