10个常用的python自动化txt脚本,小白成功上岸

这里插播一条粉丝福利,如果你正在学习Python或者有计划学习Python,想要突破自我,对未来十分迷茫的,可以点击这里获取最新的Python学习资料和学习路线规划(免费分享,记得关注)

以下是10个实用的Python文本处理自动化脚本,帮助你高效处理txt文件,包含常见需求和工作场景:

1.批量合并多个TXT文件

import os

# 定义目录路径
folder_path = "./txt_files"
output_file = "merged.txt"

# 合并所有TXT文件内容
with open(output_file, "w", encoding="utf-8") as outfile:
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".txt"):
            file_path = os.path.join(folder_path, file_name)
            with open(file_path, "r", encoding="utf-8") as infile:
                outfile.write(infile.read() + "\n")  # 合并内容并换行
    print(f"所有TXT文件已合并到 {output_file}")

2.按关键词过滤行内容

keyword = "Python"
input_file = "example.txt"
output_file = "filtered.txt"

# 按关键词过滤行
with open(input_file, "r", encoding="utf-8") as infile, open(output_file, "w", encoding="utf-8") as outfile:
    for line in infile:
        if keyword in line:
            outfile.write(line)
    print(f"包含关键词 '{keyword}' 的行已保存到 {output_file}")

3.统计文件行数、单词数、字符数

input_file = "example.txt"

with open(input_file, "r", encoding="utf-8") as file:
    lines = file.readlines()
    line_count = len(lines)
    word_count = sum(len(line.split()) for line in lines)
    char_count = sum(len(line) for line in lines)

print(f"文件行数: {line_count}, 单词数: {word_count}, 字符数: {char_count}")

4.清除TXT文件中的空行

input_file = "example.txt"
output_file = "no_empty_lines.txt"

with open(input_file, "r", encoding="utf-8") as infile, open(output_file, "w", encoding="utf-8") as outfile:
    for line in infile:
        if line.strip():  # 检查非空行
            outfile.write(line)
    print(f"空行已清除,结果保存在 {output_file}")

5.查找并替换文件中的内容

input_file = "example.txt"
output_file = "replaced.txt"
old_word = "旧词"
new_word = "新词"

with open(input_file, "r", encoding="utf-8") as infile, open(output_file, "w", encoding="utf-8") as outfile:
    for line in infile:
        outfile.write(line.replace(old_word, new_word))  # 替换旧词为新词
    print(f"所有 '{old_word}' 已替换为 '{new_word}',结果保存在 {output_file}")

6.提取TXT文件中的电子邮件地址

import re

input_file = "example.txt"
output_file = "emails.txt"

with open(input_file, "r", encoding="utf-8") as infile, open(output_file, "w", encoding="utf-8") as outfile:
    text = infile.read()
    emails = re.findall(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", text)  # 匹配邮箱
    outfile.write("\n".join(emails))  # 保存所有邮箱到新文件
    print(f"提取的邮箱已保存到 {output_file}")

7.按行分割大文件

input_file = "large_file.txt"
lines_per_file = 1000  # 每个小文件的行数
output_folder = "./split_files"

os.makedirs(output_folder, exist_ok=True)

with open(input_file, "r", encoding="utf-8") as infile:
    lines = infile.readlines()
    for i in range(0, len(lines), lines_per_file):
        split_file = os.path.join(output_folder, f"part_{i//lines_per_file + 1}.txt")
        with open(split_file, "w", encoding="utf-8") as outfile:
            outfile.writelines(lines[i:i+lines_per_file])
    print(f"文件已分割为 {len(lines)//lines_per_file + 1} 个部分,保存在 {output_folder}")

8.统计每个单词的出现频率

from collections import Counter

input_file = "example.txt"
output_file = "word_frequency.txt"

with open(input_file, "r", encoding="utf-8") as file:
    words = file.read().split()  # 按空格分词
    word_counts = Counter(words)  # 统计单词频率

with open(output_file, "w", encoding="utf-8") as outfile:
    for word, count in word_counts.most_common():
        outfile.write(f"{word}: {count}\n")
    print(f"单词频率统计已保存到 {output_file}")

9.比较两个TXT文件的差异

file1 = "file1.txt"
file2 = "file2.txt"

with open(file1, "r", encoding="utf-8") as f1, open(file2, "r", encoding="utf-8") as f2:
    lines1 = set(f1.readlines())
    lines2 = set(f2.readlines())

# 比较差异
only_in_file1 = lines1 - lines2
only_in_file2 = lines2 - lines1

print("仅在文件1中的行:", only_in_file1)
print("仅在文件2中的行:", only_in_file2)

10.生成模拟数据并写入TXT文件

import random

output_file = "random_data.txt"
num_lines = 100

with open(output_file, "w", encoding="utf-8") as file:
    for _ in range(num_lines):
        random_number = random.randint(1, 1000)  # 生成随机数
        file.write(f"{random_number}\n")  # 每行写一个随机数
    print(f"{num_lines} 行随机数据已生成到 {output_file}")

 最后,我精心筹备了一份全面的Python学习大礼包,完全免费分享给每一位渴望成长、希望突破自我现状却略感迷茫的朋友。无论您是编程新手还是希望深化技能的开发者,都欢迎加入我们的学习之旅,共同交流进步!

🌟 学习大礼包包含内容:

Python全领域学习路线图:一目了然,指引您从基础到进阶,再到专业领域的每一步学习路径,明确各方向的核心知识点。

超百节Python精品视频课程:涵盖Python编程的必备基础知识、高效爬虫技术、以及深入的数据分析技能,让您技能全面升级。

实战案例集锦:精选超过100个实战项目案例,从理论到实践,让您在解决实际问题的过程中,深化理解,提升编程能力。

华为独家Python漫画教程:创新学习方式,以轻松幽默的漫画形式,让您随时随地,利用碎片时间也能高效学习Python。

互联网企业Python面试真题集:精选历年知名互联网企业面试真题,助您提前备战,面试准备更充分,职场晋升更顺利。

👉 立即领取方式:只需【点击这里】,即刻解锁您的Python学习新篇章!让我们携手并进,在编程的海洋里探索无限可能 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值