标题注释为文件格式
import csv
import chardet
import pandas as pd
# 定义输入和输出文件名
input_filename = 'input.csv'
output_filename = 'output.csv'
import codecs
# UTF-8格式
def convert_to_utf8(input_file, output_file, encoding='utf-8'):
with codecs.open(input_file, 'r', encoding='original_encoding') as f:
content = f.read()
with codecs.open(output_file, 'w', encoding='utf-8') as f:
f.write(content)
# 使用方法
# convert_to_utf8('input_file_path', 'output_file_path', encoding='original_encoding')
# with-BOM-UTF-8
def with_bom():
# 打开原始CSV文件和目标文件进行写入,指定编码为utf-8-sig
with open(input_filename, 'r', encoding='utf-8-sig', newline='') as csvfile, codecs.open(output_filename, 'w','utf-8-sig') as out_csvfile:
reader = csv.reader(csvfile)
writer = csv.writer(out_csvfile)
# 写入BOM头
out_csvfile.write(codecs.BOM_UTF8.decode('utf-8'))
# 复制内容到新文件,添加BOM头
for row in reader:
writer.writerow(row)
with_bom()
# GBK格式
def with_gbk():
# 读取CSV文件,这里假设文件是UTF-8编码
df = pd.read_csv(input_filename, encoding='utf-8')
# 将编码转换为GBK
df.to_csv(input_filename, index=False, encoding='gbk')
# with_gb2312
def with_gb2312():
with open(input_filename, 'r', encoding='utf-8') as input_file,codecs.open(output_filename, 'w', 'gb2312') as output_file:
# 创建CSV读取器
reader = csv.reader(input_file)
writer = csv.writer(output_file)
# 读取并写入
for row in reader:
writer.writerow(row)
# with_gb2312()
#查看文件格式
def detect_csv_encoding(file_path):
with open(file_path, 'rb') as f:
data = f.read()
return chardet.detect(data)['encoding']
# 使用函数
file_path = 'example.csv' # 替换为你的CSV文件路径
encoding = detect_csv_encoding(file_path)
print(f"The encoding of the CSV file is: {encoding}")
2122

被折叠的 条评论
为什么被折叠?



