Python3批量转换文本文件编码

本博客介绍了一个Python脚本,用于批量检测并自动转换文件编码,同时提供了手动选择编码的功能。脚本能够识别文件编码,进行转换,并处理转换失败的情况。适用于大量文件的编码统一与错误修复。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >



#-*- coding: utf-8 -*-
try:
    from chardet.universaldetector import UniversalDetector
    IsAuto = True
except  ImportError:
    IsAuto = False
import os
import os.path
import glob
 
def Convert_Auto( filename,out_enc="utf-8" ): 
    ''' Re-encode text file with auto detec current encode. Need chardet Lib.
Input Parameter:
        filename: full path and file name, e.g. c:\dir1\file.txt
        out_enc: new encode. Default as 'utf-8'
Output Parameter
        None'''
    try
        f=open(filename,'rb')
        b= b' '
        b+=f.read(1024)
        u=UniversalDetector()
        u.reset()
        u.feed(b)
        u.close()
        f.seek(0)
        b=f.read()
        f.close()
        in_enc=u.result['encoding']
        new_content=b.decode(in_enc, 'ignore')
        f=open(filename, 'w', encoding=out_enc)
        f.write(new_content)
        f.close()
        print ("Success: "+filename+" converted from "+ in_enc+" to "+out_enc +" !")
    except IOError:
        print ("Error: "+filename+" FAIL to converted from "+ in_enc+" to "+out_enc+" !" )
 
def Convert_Manu( filename,in_enc='gbk', out_enc="utf-8" ): 
    ''' Re-encode text file with manual decide input text encode.
Input Parameter:
        filename: full path and file name, e.g. c:\dir1\file.txt
        in_enc:  current encode. Default as 'gbk'
        out_enc: new encode. Default as 'utf-8'
Output Parameter
        None'''
    try
        print ("convert " + filename)
        f=open(filename,'rb')
        b=f.read()
        f.close()
        new_content=b.decode(in_enc, 'ignore')
        f=open(filename, 'w', encoding=out_enc)
        f.write(new_content)
        f.close()
        print ("Success: "+filename+" converted from "+ in_enc+" to "+out_enc +" !")
    except IOError:
        print ("Error: "+filename+" FAIL to converted from "+ in_enc+" to "+out_enc+" !" )
 
 
def explore(dir, IsLoopSubDIR=True):
    '''Convert files encoding.
    Input: 
        dir         : Current folder
        IsLoopSubDIR:   True -- Include files in sub folder
                        False-- Only include files in current folder
    Output:
        NONE
    '''
    if IsLoopSubDIR:
        flist=getSubFileList(dir, '.txt')
    else:
        flist=getCurrFileList(dir, '.txt')
    for fname in flist:
        if IsAuto:
            Convert_Auto(fname, 'utf-8')
        else:
            Convert_Manu(fname, 'gbk', 'utf-8')
 
     
def getSubFileList(dir, suffix=''):
    '''Get all file list with specified  suffix under current folder(Include sub folder)
    Input: 
        dir     :   Current folder
        suffix  :   default to blank, means select all files.
    Output:
        File list
    '''
    flist=[]
    for root, dirs, files in os.walk(os.getcwd()):
        for name in files:
            if name.endswith(suffix):
                flist.append(os.path.join(root,  name))
    return flist
 
def getCurrFileList(dir, suffix=''):
    '''Get all file list with specified suffix under current level folder
    Input: 
        dir     :   Current folder
        suffix  :   default to blank, means select all files.
    Output:
        File list
    '''
    if suffix=='':  
        files=glob.glob('*')
    else:
        files=glob.glob('*'+suffix)
    flist=[]   
    for f in files:
        flist.append(os.path.join(os.getcwd(), f))
    return flist
         
         
def main(): 
    explore(os.getcwd(), True)
     
if __name__ == "__main__"
   main() 
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值