import cchardet
def get_file_encoding(filepath):
with open(filepath, 'rb') as f:
encoding = cchardet.detect(f.read())['encoding']
if encoding in ["ISO-8859-1", "ASCII"]:
return "GB2312"
if encoding == "EUC-TW":
return "GBK"
return encoding
C语言编写的cchardet 效率比python编写的chardet 效率高60倍
编码关系:

当使用GBK编码写入含有部分UTF-8编码字符串提示\xa0等字符无法识别时,可以使用unicodedata处理异常空白符转成可见空格
import unicodedata # python原生库
content = unicodedata.normalize('NFKC', content)
with open(target_file, "w", encoding="gbk") as o: