10个常用的python自动化txt脚本,小白成功上岸

原创于 2024-12-09 15:43:42 发布 · 669 阅读

5 ·

CC 4.0 BY-SA版权

文章标签：

#python #开发语言 #Python学习 #python基础 #python教程 #python入门 #python教学

Python入门同时被 3 个专栏收录

275 篇文章

订阅专栏

Python学习

247 篇文章

订阅专栏

Python零基础

220 篇文章

订阅专栏

这里插播一条粉丝福利，如果你正在学习Python或者有计划学习Python，想要突破自我，对未来十分迷茫的，可以点击这里获取最新的Python学习资料和学习路线规划（免费分享，记得关注）

以下是10个实用的Python文本处理自动化脚本，帮助你高效处理txt文件，包含常见需求和工作场景：

1.批量合并多个TXT文件

import os

# 定义目录路径
folder_path = "./txt_files"
output_file = "merged.txt"

# 合并所有TXT文件内容
with open(output_file, "w", encoding="utf-8") as outfile:
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".txt"):
            file_path = os.path.join(folder_path, file_name)
            with open(file_path, "r", encoding="utf-8") as infile:
                outfile.write(infile.read() + "\n")  # 合并内容并换行
    print(f"所有TXT文件已合并到 {output_file}")

2.按关键词过滤行内容

keyword = "Python"
input_file = "example.txt"
output_file = "filtered.txt"

# 按关键词过滤行
with open(input_file, "r", encoding="utf-8") as infile, open(output_file, "w", encoding="utf-8") as outfile:
    for line in infile:
        if keyword in line:
            outfile.write(line)
    print(f"包含关键词 '{keyword}' 的行已保存到 {output_file}")

3.统计文件行数、单词数、字符数

input_file = "example.txt"

with open(input_file, "r", encoding="utf-8") as file:
    lines = file.readlines()
    line_count = len(lines)
    word_count = sum(len(line.split()) for line in lines)
    char_count = sum(len(line) for line in lines)

print(f"文件行数: {line_count}, 单词数: {word_count}, 字符数: {char_count}")

4.清除TXT文件中的空行

input_file = "example.txt"
output_file = "no_empty_lines.txt"

with open(input_file, "r", encoding="utf-8") as infile, open(output_file, "w", encoding="utf-8") as outfile:
    for line in infile:
        if line.strip():  # 检查非空行
            outfile.write(line)
    print(f"空行已清除，结果保存在 {output_file}")

5.查找并替换文件中的内容

input_file = "example.txt"
output_file = "replaced.txt"
old_word = "旧词"
new_word = "新词"

with open(input_file, "r", encoding="utf-8") as infile, open(output_file, "w", encoding="utf-8") as outfile:
    for line in infile:
        outfile.write(line.replace(old_word, new_word))  # 替换旧词为新词
    print(f"所有 '{old_word}' 已替换为 '{new_word}'，结果保存在 {output_file}")

6.提取TXT文件中的电子邮件地址

import re

input_file = "example.txt"
output_file = "emails.txt"

with open(input_file, "r", encoding="utf-8") as infile, open(output_file, "w", encoding="utf-8") as outfile:
    text = infile.read()
    emails = re.findall(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", text)  # 匹配邮箱
    outfile.write("\n".join(emails))  # 保存所有邮箱到新文件
    print(f"提取的邮箱已保存到 {output_file}")

7.按行分割大文件

input_file = "large_file.txt"
lines_per_file = 1000  # 每个小文件的行数
output_folder = "./split_files"

os.makedirs(output_folder, exist_ok=True)

with open(input_file, "r", encoding="utf-8") as infile:
    lines = infile.readlines()
    for i in range(0, len(lines), lines_per_file):
        split_file = os.path.join(output_folder, f"part_{i//lines_per_file + 1}.txt")
        with open(split_file, "w", encoding="utf-8") as outfile:
            outfile.writelines(lines[i:i+lines_per_file])
    print(f"文件已分割为 {len(lines)//lines_per_file + 1} 个部分，保存在 {output_folder}")

8.统计每个单词的出现频率

from collections import Counter

input_file = "example.txt"
output_file = "word_frequency.txt"

with open(input_file, "r", encoding="utf-8") as file:
    words = file.read().split()  # 按空格分词
    word_counts = Counter(words)  # 统计单词频率

with open(output_file, "w", encoding="utf-8") as outfile:
    for word, count in word_counts.most_common():
        outfile.write(f"{word}: {count}\n")
    print(f"单词频率统计已保存到 {output_file}")

9.比较两个TXT文件的差异

file1 = "file1.txt"
file2 = "file2.txt"

with open(file1, "r", encoding="utf-8") as f1, open(file2, "r", encoding="utf-8") as f2:
    lines1 = set(f1.readlines())
    lines2 = set(f2.readlines())

# 比较差异
only_in_file1 = lines1 - lines2
only_in_file2 = lines2 - lines1

print("仅在文件1中的行：", only_in_file1)
print("仅在文件2中的行：", only_in_file2)

10.生成模拟数据并写入TXT文件

import random

output_file = "random_data.txt"
num_lines = 100

with open(output_file, "w", encoding="utf-8") as file:
    for _ in range(num_lines):
        random_number = random.randint(1, 1000)  # 生成随机数
        file.write(f"{random_number}\n")  # 每行写一个随机数
    print(f"{num_lines} 行随机数据已生成到 {output_file}")

最后，我精心筹备了一份全面的Python学习大礼包，完全免费分享给每一位渴望成长、希望突破自我现状却略感迷茫的朋友。无论您是编程新手还是希望深化技能的开发者，都欢迎加入我们的学习之旅，共同交流进步！

🌟 学习大礼包包含内容：

Python全领域学习路线图：一目了然，指引您从基础到进阶，再到专业领域的每一步学习路径，明确各方向的核心知识点。

超百节Python精品视频课程：涵盖Python编程的必备基础知识、高效爬虫技术、以及深入的数据分析技能，让您技能全面升级。

实战案例集锦：精选超过100个实战项目案例，从理论到实践，让您在解决实际问题的过程中，深化理解，提升编程能力。

华为独家Python漫画教程：创新学习方式，以轻松幽默的漫画形式，让您随时随地，利用碎片时间也能高效学习Python。

互联网企业Python面试真题集：精选历年知名互联网企业面试真题，助您提前备战，面试准备更充分，职场晋升更顺利。