利用python批量转换一个目录下所有指定文件的编码方式,使用前需要设置好转换的路径、编码方式和文件类型。建议同时安装PyCharm和anaconda,PyCharm用于编码和调试因为他的调试功能十分强大,anaconda用于安装python和python的运行库 下面是源码: # -*- coding: utf-8 -*- """ 批量编码转换,用于批量转换文件的编码,支持utf-8,utf-8-bom,gb2312,gbk编码之间的转化,其他编码暂时没有测试 使用方法:修改main中文件的路径、文件的类型和需要转换的编码 src_dir = "d:\\test" --> 指定要转换的文件夹路径 tag_type = "utf-8-sig" --> 指定目标转换格式 type_filter = [".cpp", ".h"] --> 指定需要转换的文件类型 注意:utf-8-bom请写成utf-8-sig """ import sys import os import codecs import chardet def utf8_conversion(f_context, code_type): if code_type.lower() == "utf-8": return f_context elif code_type.lower() == "utf-8-bom" or code_type.lower() == "utf-8-sig": return codecs.BOM_UTF8 + f_context else: return f_context.decode("utf-8").encode(code_type) def conversion_to_utf8(f_context, code_type): if code_type.lower() == "utf-8": return f_context elif code_type.lower() == "utf-8-bom" or code_type.lower() == "utf-8-sig": return f_context[3:] else: return f_context.decode(code_type).encode("utf-8") def file_code_conversion(file_name, tag_code_type): src_file = open(file_name, "rb") file_context = src_file.read() src_file.close() if file_context == "": print file_name + " -- empty file" return code_type = chardet.detect(file_context)["encoding"] if code_type.lower() == tag_code_type.lower(): print file_name + " -- escape file" return else: print file_name + (" -- converted from %s into %s" % (code_type, tag_code_type)) tag_utf8_context = conversion_to_utf8(file_context, code_type) tag_context = utf8_conversion(tag_utf8_context, tag_code_type) tag_file = open(file_name, "wb") tag_file.write(tag_context) tag_file.close() if __name__ == "__main__": src_dir = "D:\\project\\myproject" tag_type = "utf-8-bom" type_filter = [".cpp", ".h"] for dir_path, dirs, files in os.walk(src_dir): for name in files: if os.path.splitext(name)[1] in type_filter: file_code_conversion(os.path.join(dir_path, name), tag_type)
python 批量转换文件的编码方式
最新推荐文章于 2022-12-09 21:28:16 发布