方便速查
读写csv,不使用pandas
import os
import json
import csv
output_folder = ...
file_path = ...
file_name = ...
if not os.path.exists(output_folder):
os.makedirs(output_folder)
with open(file_path, 'r') as f:
output_path = os.path.join(output_folder,'{}.csv'.format(file_name))
with open(output_path, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=['col1', 'col2'])
writer.writeheader()
for line in f.readlines():
dic = json.loads(line)
col1 = dic.get('col1','')
col2 = dic.get('col2','')
writer.writerow({
'col1' : col1,
'col2' : col2
})
使用pandas读,不使用pandas写csv(pandas写csv遇到长文本时可能会出错)
import pandas as pd
import os
import csv
def concatenate_columns(row):
template = '''a: "{}"
b: "{}"
c: "{}"
d: "{}"
'''
request_str = template.format(row['a'], row['b'], row['c'], row['d'])
return request_str
if __name__ == '__main__':
output_folder = ...
input_file_name = ...
input_file_path = os.path.join(output_folder, input_file_name)
output_path = os.path.join(output_folder,'{}.scores.csv'.format(input_file_name))
total_lines = 0
with open(output_path, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=['e', 'f'])
writer.writeheader()
for chunk in pd.read_csv(input_file_path, chunksize=4):
chunk['concatenated'] = chunk.apply(concatenate_columns, axis=1)
warmup_sentences = chunk['concatenated'].tolist()
e_lst = chunk['e'].tolist()
results = ...
for e, score in zip(e_lst, results):
writer.writerow({
'e' : e,
'f' : score
})
print('total_lines={}'.format(total_lines))
读写json文件
文件格式如下:
[
{"name": "Alice", "age": 30, "city": "New York"},
{"name": "Bob", "age": 25, "city": "Los Angeles"},
{"name": "Charlie", "age": 35, "city": "Chicago"}
]
import json
with open(input_path, 'r') as input_file:
dic_lst = json.load(input_file)
for dic in dic_lst:
print('dic={}'.format(dic))
with open(output_path, mode='w') as output_file:
output_file.write(json.dumps(dic_lst, indent=True, ensure_ascii=False) + '\n')
列出文件夹下全部文件
目录如下:
(base) ➜ Downloads tree tmp
tmp
├── tmp1
└── tmp2
├── a1.txt
└── a2.txt
递归调用代码如下:
import os
def list_all_files(directory):
all_files = []
for root, dirs, files in os.walk(directory):
print('root={} dirs={} files={}'.format(root, dirs, files))
for file in files:
all_files.append(os.path.join(root, file))
return all_files
# 示例使用
directory = '/Users/bytedance/Downloads/tmp/'
all_files = list_all_files(directory)
print(all_files)
输出如下:
root=/Users/bytedance/Downloads/tmp/ dirs=['tmp2', 'tmp1'] files=['.DS_Store']
root=/Users/bytedance/Downloads/tmp/tmp2 dirs=[] files=['a1.txt', 'a2.txt']
root=/Users/bytedance/Downloads/tmp/tmp1 dirs=[] files=[]
['/Users/bytedance/Downloads/tmp/.DS_Store', '/Users/bytedance/Downloads/tmp/tmp2/a1.txt', '/Users/bytedance/Downloads/tmp/tmp2/a2.txt']