今天做了一个脚本,其目的大致是读取一系列压缩包文件(.gz),用 winrar 解压后,对每个文件进行一定的内容替换,然后另存成我们需要的文件。
#!/usr/bin/env python
# -*- coding: GBK -*-
#
# 自解压日志文件,逐行替换后生成新文件
# Neil Chen, 2006-4-27
#
import sys
import urllib
import win32api
# 压缩包所在目录
src_dir = r"E:/py/logreplace/src"
# 解压后的目录
extracted_dir = r"E:/py/logreplace/extracted"
# 处理完的文件存放目录
target_dir = r"E:/py/logreplace/target"
# 查找什么字符串?
find_what = 'GET http://www.bora.com.cn'
# 替换为什么?
replace_as_what = 'GET '
# winrar 命令的参数格式
cmd_params_template = r' e -y %s %s'
raw_files = urllib.os.listdir(src_dir)
# 逐一解压
for f in raw_files:
fpath = src_dir + "//" + f
cmd_params = cmd_params_template % (fpath, extracted_dir + "//")
#print cmd_params
win32api.ShellExecute(0, None, 'winrar.exe', cmd_params, "c://", False)
extracted_files = urllib.os.listdir(extracted_dir)
func = lambda(x): x.replace(find_what, replace_as_what)
# 替换,生成新文件
for f2 in extracted_files:
fpath2 = extracted_dir + "//" + f2
fpath3 = target_dir + "//" + f2
raw_file = open(fpath2, 'r')
lines = raw_file.readlines()
new_lines = [func(x) for x in lines]
#print len(new_lines)
target_file = open(fpath3, 'w')
target_file.writelines(new_lines)
raw_file.close()
target_file.close()
# -*- coding: GBK -*-
#
# 自解压日志文件,逐行替换后生成新文件
# Neil Chen, 2006-4-27
#
import sys
import urllib
import win32api
# 压缩包所在目录
src_dir = r"E:/py/logreplace/src"
# 解压后的目录
extracted_dir = r"E:/py/logreplace/extracted"
# 处理完的文件存放目录
target_dir = r"E:/py/logreplace/target"
# 查找什么字符串?
find_what = 'GET http://www.bora.com.cn'
# 替换为什么?
replace_as_what = 'GET '
# winrar 命令的参数格式
cmd_params_template = r' e -y %s %s'
raw_files = urllib.os.listdir(src_dir)
# 逐一解压
for f in raw_files:
fpath = src_dir + "//" + f
cmd_params = cmd_params_template % (fpath, extracted_dir + "//")
#print cmd_params
win32api.ShellExecute(0, None, 'winrar.exe', cmd_params, "c://", False)
extracted_files = urllib.os.listdir(extracted_dir)
func = lambda(x): x.replace(find_what, replace_as_what)
# 替换,生成新文件
for f2 in extracted_files:
fpath2 = extracted_dir + "//" + f2
fpath3 = target_dir + "//" + f2
raw_file = open(fpath2, 'r')
lines = raw_file.readlines()
new_lines = [func(x) for x in lines]
#print len(new_lines)
target_file = open(fpath3, 'w')
target_file.writelines(new_lines)
raw_file.close()
target_file.close()
本文介绍了一种使用Python批量处理多个压缩的日志文件的方法。该方法首先通过WinRAR解压.gz格式的压缩文件,接着读取并修改解压后的文件内容,最后将更新后的内容保存为新的文件。

被折叠的 条评论
为什么被折叠?



