数据库里导出的CSV的 GB2312格式转成UTF-8-SIG 格式

最新推荐文章于 2025-03-13 21:35:50 发布

原创最新推荐文章于 2025-03-13 21:35:50 发布 · 681 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#python

python 专栏收录该内容

11 篇文章

订阅专栏

该脚本用于遍历指定目录下所有.csv文件，检测其原始编码并转为UTF-8。首先，使用os和chardet库获取文件路径和编码信息，然后读取文件内容，解码为UTF-8并忽略可能出现的错误，最后将转换后的内容写回文件。

#!/usr/bin/env python
# -*- coding:utf-8 -*-
import os
from chardet import detect

def decod(path):
fns = []
for root,dir,file in os.walk(path):
if dir == []:
print(root,dir,file)
for f in file:
if os.path.splitext(f)[1] == '.csv':
fns.append(root + os.path.sep + "".join(f))
#print(fns)

for fn in fns:
with open(fn, 'rb+') as fp:
content = fp.read()
encoding = detect(content)['encoding']
print("before coding is %s" % encoding)
content = content.decode(encoding, 'ignore').encode('UTF-8-SIG', 'ignore')
fp.seek(0)
fp.write(content)
return fns

if __name__ == "__main__":

path = r'D:\DTG_CSV_Data\20190628\SZSEL2'
fns = decod(path) # 转换完的文件
for fn in fns:
with open(fn, 'rb+') as fp:
content = fp.read()
encoding = detect(content)['encoding']
print("after coding is %s" % encoding)