#!/usr/bin/env python
# -*- coding:utf-8 -*-
import os
from chardet import detect
def decod(path):
fns = []
for root,dir,file in os.walk(path):
if dir == []:
print(root,dir,file)
for f in file:
if os.path.splitext(f)[1] == '.csv':
fns.append(root + os.path.sep + "".join(f))
#print(fns)
for fn in fns:
with open(fn, 'rb+') as fp:
content = fp.read()
encoding = detect(content)['encoding']
print("before coding is %s" % encoding)
content = content.decode(encoding, 'ignore').encode('UTF-8-SIG', 'ignore')
fp.seek(0)
fp.write(content)
return fns
if __name__ == "__main__":
path = r'D:\DTG_CSV_Data\20190628\SZSEL2'
fns = decod(path) # 转换完的文件
for fn in fns:
with open(fn, 'rb+') as fp:
content = fp.read()
encoding = detect(content)['encoding']
print("after coding is %s" % encoding)
该脚本用于遍历指定目录下所有.csv文件,检测其原始编码并转为UTF-8。首先,使用os和chardet库获取文件路径和编码信息,然后读取文件内容,解码为UTF-8并忽略可能出现的错误,最后将转换后的内容写回文件。
1397

被折叠的 条评论
为什么被折叠?



