安装
chardet 用于检测文件编码
pip install chardet
库文件的作用
- os:用于遍历目录和处理文件路径。
- chardet:用于检测文件编码。
- pathlib:一个处理文件路径的库,可以使代码更简洁、易读。
用法
1、start_directory设置要转的文件夹
2、ignore_directories 设置忽略要转的文件夹
3、ignore_extensions 设置忽略要文转的后缀
代码
import os
import chardet
from pathlib import Path
def detect_encoding(file_path):
with open(file_path, 'rb') as f:
data = f.read()
result = chardet.detect(data)
return result['encoding']
def convert_to_utf8(file_path, original_encoding):
with open(file_path, 'r', encoding=original_encoding) as f:
content = f.read()
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
def process_directory(start_directory):
ignore_directories = {".git"}
ignore_extensions = {".png"}
for root, dirs, files in os.walk(start_directory):
# 忽略指定的文件夹
for file in files:
file_path = Path(root) / file
# 忽略指定的文件类型
if file_path.suffix in ignore_extensions:
continue
original_encoding = detect_encoding(file_path)
if original_encoding != 'utf-8':
print(f"Converting {file_path} from {original_encoding} to utf-8")
convert_to_utf8(file_path, original_encoding)
else:
print(f"{file_path} is already in utf-8 format")
if __name__ == '__main__':
start_directory = 'D:\PersonGit\PersonalLearningSpace'
process_directory(start_directory)