#-*-
coding: utf-8 -*-try: from
chardet.universaldetector import
UniversalDetector IsAuto
=
Trueexcept
ImportError: IsAuto
=
Falseimport
osimport
os.pathimport
globdef
Convert_Auto( filename,out_enc="utf-8"
): '''
Re-encode text file with auto detec current encode. Need chardet Lib. Input
Parameter: filename:
full path and file name, e.g. c:\dir1\file.txt out_enc:
new encode. Default as 'utf-8'Output
Parameter None''' try:
f=open(filename,'rb') b=
b'
' b+=f.read(1024) u=UniversalDetector() u.reset() u.feed(b) u.close() f.seek(0) b=f.read() f.close() in_enc=u.result['encoding'] new_content=b.decode(in_enc,
'ignore') f=open(filename,
'w',
encoding=out_enc) f.write(new_content) f.close() print
("Success:
"+filename+"
converted from "+
in_enc+"
to "+out_enc
+"
!") except
IOError: print
("Error:
"+filename+"
FAIL to converted from "+
in_enc+"
to "+out_enc+"
!"
)def
Convert_Manu( filename,in_enc='gbk',
out_enc="utf-8"
): '''
Re-encode text file with manual decide input text encode.Input
Parameter: filename:
full path and file name, e.g. c:\dir1\file.txt in_enc:
current encode. Default as 'gbk' out_enc:
new encode. Default as 'utf-8'Output
Parameter None''' try:
print
("convert
"
+
filename) f=open(filename,'rb') b=f.read() f.close() new_content=b.decode(in_enc,
'ignore') f=open(filename,
'w',
encoding=out_enc) f.write(new_content) f.close() print
("Success:
"+filename+"
converted from "+
in_enc+"
to "+out_enc
+"
!") except
IOError: print
("Error:
"+filename+"
FAIL to converted from "+
in_enc+"
to "+out_enc+"
!"
)def
explore(dir,
IsLoopSubDIR=True): '''Convert
files encoding. Input:
dir
: Current folder IsLoopSubDIR:
True -- Include files in sub folder False--
Only include files in current folder Output: NONE ''' if
IsLoopSubDIR: flist=getSubFileList(dir,
'.txt') else: flist=getCurrFileList(dir,
'.txt') for
fname in
flist: if
IsAuto: Convert_Auto(fname,
'utf-8') else: Convert_Manu(fname,
'gbk',
'utf-8') def
getSubFileList(dir,
suffix=''): '''Get
all file list with specified suffix under current folder(Include sub folder) Input:
dir
: Current folder suffix
: default to blank, means select all files. Output: File
list ''' flist=[] for
root, dirs, files in
os.walk(os.getcwd()): for
name in
files: if
name.endswith(suffix): flist.append(os.path.join(root,
name)) return
flistdef
getCurrFileList(dir,
suffix=''): '''Get
all file list with specified suffix under current level folder Input:
dir
: Current folder suffix
: default to blank, means select all files. Output: File
list ''' if
suffix=='':
files=glob.glob('*') else: files=glob.glob('*'+suffix) flist=[]
for
f in
files: flist.append(os.path.join(os.getcwd(),
f)) return
flist def
main(): explore(os.getcwd(),
True) if
__name__ ==
"__main__":
main()
本博客介绍了一个Python脚本,用于批量检测并自动转换文件编码,同时提供了手动选择编码的功能。脚本能够识别文件编码,进行转换,并处理转换失败的情况。适用于大量文件的编码统一与错误修复。
431

被折叠的 条评论
为什么被折叠?



