文本抽取与还原

功能:

1、读取文件夹下的所有文本,合并到一个文本中

2、支持将合并后的文本拆分成原目录结构

readtxt.py

import os

check_path = '.'
out_path = '..'
debug_str = '#define new DEBUG_NEW'
report_log = 'report_debug_new_check.log'
out_file = "out.txt"
config_file = "config.txt"
check_ext = '.txt'
report_list = []
out_list = []
config_list = []

def check_file(fp):
    if check_ext not in fp:
        return
        
    #out_list2 = []
    lines = 0;
    f = open(fp, 'r')
    try:
        for line in f.readlines():
        	out_list.append(line)
        	lines += 1
        #out_list2.append("\n\n\n");
        #lines += 2
        #print len(out_list2)
        config_list.append(str(lines) + '#' + str(fp) + '\n');
    finally:
        f.close()
        #f = open(fp, 'r+w')
        #f.writelines(out_list2)
    

def walk_dir(dirname):
    try:
        sub_items = os.listdir(dirname)
    except:
        print 'Access denied:', dirname
    else:
        for item in sub_items:
            full_path = os.path.join(dirname, item)
            if os.path.isdir(full_path):
                walk_dir(full_path)
            else:
                check_file(full_path)

 
if __name__ == "__main__":
    if os.path.isdir(check_path):
        check_path += '/'
    walk_dir(check_path)
    log_file = open(os.path.join(out_path, report_log), 'w')
    out_file = open(os.path.join(out_path, out_file), 'w')
    config_file = open(os.path.join(out_path, config_file), 'w')
    try:
        log_file.writelines(report_list)
        out_file.writelines(out_list)
        config_file.writelines(config_list)
        
    finally:
        log_file.close()

write.py

# -*- coding:utf-8 -*-   
import os
import linecache
dirs = []
txt_list = []
index = 1
def readnumlines(num,fp):
	global index
	print index
	lines = ""
	print linecache.getline("./out.txt",1)
	for x in range(index,num+index):
		line = linecache.getline("./out.txt",x)
		txt_list.append(line)
	index += num
	print index
		
def mkFolder(paths):
    if not os.access(paths, os.R_OK):
        path_last= len(paths)-1;
        if paths[path_last] == '/' or paths[path_last] == '\\':
            paths= paths[0:path_last];
            mkFolder(os.path.dirname(paths))
        if not os.path.isfile(paths):
            os.mkdir(paths)
            
            	
if __name__ == "__main__":
	fp = open("./out.txt",'r')
	#
	config = open("./config.txt",'r')
	for line in config.readlines():
		#config
		num = line.split('#')[0]
		name = line.split('#')[1]
		print name
		
		#name
		length = len(name) -1
		name = name[0:length]

		dirs = name.split('/')
		path = ""
		for i in range(len(dirs)-1):
			path += dirs[i] + '/'
		mkFolder(path)
		print name.split('/')
		filename = open(name,'w')
		readnumlines(int(num),fp)
		filename.writelines(txt_list)
		filename.close()
		txt_list = []
	fp.close()
	


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值