python文件操作汇总

方便速查

读写csv,不使用pandas

import os
import json
import csv

output_folder = ...
file_path = ...
file_name = ...

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

with open(file_path, 'r') as f:
    output_path = os.path.join(output_folder,'{}.csv'.format(file_name))
    with open(output_path, 'w') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=['col1', 'col2'])
        writer.writeheader()
        for line in f.readlines():
            dic = json.loads(line)
            col1 = dic.get('col1','')
            col2 = dic.get('col2','')
            writer.writerow({
                'col1' : col1,
                'col2' : col2
            })

使用pandas读,不使用pandas写csv(pandas写csv遇到长文本时可能会出错)

import pandas as pd
import os
import csv

def concatenate_columns(row):
    template = '''a: "{}"
    b: "{}"
    c: "{}"
    d: "{}"
    '''
    request_str = template.format(row['a'], row['b'], row['c'], row['d'])
    return request_str

if __name__ == '__main__':

    output_folder = ...
    input_file_name = ...
    input_file_path = os.path.join(output_folder, input_file_name)
    output_path = os.path.join(output_folder,'{}.scores.csv'.format(input_file_name))

    total_lines = 0
    with open(output_path, 'w') as csvfile:

        writer = csv.DictWriter(csvfile, fieldnames=['e', 'f'])
        writer.writeheader()

        for chunk in pd.read_csv(input_file_path, chunksize=4):
            chunk['concatenated'] = chunk.apply(concatenate_columns, axis=1)
            warmup_sentences = chunk['concatenated'].tolist()
            e_lst = chunk['e'].tolist()
            results = ...
            for e, score in zip(e_lst, results):
                writer.writerow({
                    'e' : e,
                    'f' : score
                })
            print('total_lines={}'.format(total_lines))

读写json文件

文件格式如下:

[
    {"name": "Alice", "age": 30, "city": "New York"},
    {"name": "Bob", "age": 25, "city": "Los Angeles"},
    {"name": "Charlie", "age": 35, "city": "Chicago"}
]
import json

with open(input_path, 'r') as input_file:
    dic_lst = json.load(input_file)
    for dic in dic_lst:
        print('dic={}'.format(dic))

with open(output_path, mode='w') as output_file:
    output_file.write(json.dumps(dic_lst, indent=True, ensure_ascii=False) + '\n')

列出文件夹下全部文件

目录如下:

(base) ➜  Downloads tree tmp
tmp
├── tmp1
└── tmp2
    ├── a1.txt
    └── a2.txt

递归调用代码如下:


import os

def list_all_files(directory):
    all_files = []
    for root, dirs, files in os.walk(directory):
        print('root={} dirs={} files={}'.format(root, dirs, files))
        for file in files:
            all_files.append(os.path.join(root, file))
    return all_files

# 示例使用
directory = '/Users/bytedance/Downloads/tmp/'
all_files = list_all_files(directory)
print(all_files)

输出如下:

root=/Users/bytedance/Downloads/tmp/ dirs=['tmp2', 'tmp1'] files=['.DS_Store']
root=/Users/bytedance/Downloads/tmp/tmp2 dirs=[] files=['a1.txt', 'a2.txt']
root=/Users/bytedance/Downloads/tmp/tmp1 dirs=[] files=[]
['/Users/bytedance/Downloads/tmp/.DS_Store', '/Users/bytedance/Downloads/tmp/tmp2/a1.txt', '/Users/bytedance/Downloads/tmp/tmp2/a2.txt']
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值