这里插播一条粉丝福利,如果你正在学习Python或者有计划学习Python,想要突破自我,对未来十分迷茫的,可以点击这里获取最新的Python学习资料和学习路线规划(免费分享,记得关注)
以下是10个实用的Python文本处理自动化脚本,帮助你高效处理txt
文件,包含常见需求和工作场景:
1.批量合并多个TXT文件
import os
# 定义目录路径
folder_path = "./txt_files"
output_file = "merged.txt"
# 合并所有TXT文件内容
with open(output_file, "w", encoding="utf-8") as outfile:
for file_name in os.listdir(folder_path):
if file_name.endswith(".txt"):
file_path = os.path.join(folder_path, file_name)
with open(file_path, "r", encoding="utf-8") as infile:
outfile.write(infile.read() + "\n") # 合并内容并换行
print(f"所有TXT文件已合并到 {output_file}")
2.按关键词过滤行内容
keyword = "Python"
input_file = "example.txt"
output_file = "filtered.txt"
# 按关键词过滤行
with open(input_file, "r", encoding="utf-8") as infile, open(output_file, "w", encoding="utf-8") as outfile:
for line in infile:
if keyword in line:
outfile.write(line)
print(f"包含关键词 '{keyword}' 的行已保存到 {output_file}")
3.统计文件行数、单词数、字符数
input_file = "example.txt"
with open(input_file, "r", encoding="utf-8") as file:
lines = file.readlines()
line_count = len(lines)
word_count = sum(len(line.split()) for line in lines)
char_count = sum(len(line) for line in lines)
print(f"文件行数: {line_count}, 单词数: {word_count}, 字符数: {char_count}")
4.清除TXT文件中的空行
input_file = "example.txt"
output_file = "no_empty_lines.txt"
with open(input_file, "r", encoding="utf-8") as infile, open(output_file, "w", encoding="utf-8") as outfile:
for line in infile:
if line.strip(): # 检查非空行
outfile.write(line)
print(f"空行已清除,结果保存在 {output_file}")
5.查找并替换文件中的内容
input_file = "example.txt"
output_file = "replaced.txt"
old_word = "旧词"
new_word = "新词"
with open(input_file, "r", encoding="utf-8") as infile, open(output_file, "w", encoding="utf-8") as outfile:
for line in infile:
outfile.write(line.replace(old_word, new_word)) # 替换旧词为新词
print(f"所有 '{old_word}' 已替换为 '{new_word}',结果保存在 {output_file}")
6.提取TXT文件中的电子邮件地址
import re
input_file = "example.txt"
output_file = "emails.txt"
with open(input_file, "r", encoding="utf-8") as infile, open(output_file, "w", encoding="utf-8") as outfile:
text = infile.read()
emails = re.findall(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", text) # 匹配邮箱
outfile.write("\n".join(emails)) # 保存所有邮箱到新文件
print(f"提取的邮箱已保存到 {output_file}")
7.按行分割大文件
input_file = "large_file.txt"
lines_per_file = 1000 # 每个小文件的行数
output_folder = "./split_files"
os.makedirs(output_folder, exist_ok=True)
with open(input_file, "r", encoding="utf-8") as infile:
lines = infile.readlines()
for i in range(0, len(lines), lines_per_file):
split_file = os.path.join(output_folder, f"part_{i//lines_per_file + 1}.txt")
with open(split_file, "w", encoding="utf-8") as outfile:
outfile.writelines(lines[i:i+lines_per_file])
print(f"文件已分割为 {len(lines)//lines_per_file + 1} 个部分,保存在 {output_folder}")
8.统计每个单词的出现频率
from collections import Counter
input_file = "example.txt"
output_file = "word_frequency.txt"
with open(input_file, "r", encoding="utf-8") as file:
words = file.read().split() # 按空格分词
word_counts = Counter(words) # 统计单词频率
with open(output_file, "w", encoding="utf-8") as outfile:
for word, count in word_counts.most_common():
outfile.write(f"{word}: {count}\n")
print(f"单词频率统计已保存到 {output_file}")
9.比较两个TXT文件的差异
file1 = "file1.txt"
file2 = "file2.txt"
with open(file1, "r", encoding="utf-8") as f1, open(file2, "r", encoding="utf-8") as f2:
lines1 = set(f1.readlines())
lines2 = set(f2.readlines())
# 比较差异
only_in_file1 = lines1 - lines2
only_in_file2 = lines2 - lines1
print("仅在文件1中的行:", only_in_file1)
print("仅在文件2中的行:", only_in_file2)
10.生成模拟数据并写入TXT文件
import random
output_file = "random_data.txt"
num_lines = 100
with open(output_file, "w", encoding="utf-8") as file:
for _ in range(num_lines):
random_number = random.randint(1, 1000) # 生成随机数
file.write(f"{random_number}\n") # 每行写一个随机数
print(f"{num_lines} 行随机数据已生成到 {output_file}")
最后,我精心筹备了一份全面的Python学习大礼包,完全免费分享给每一位渴望成长、希望突破自我现状却略感迷茫的朋友。无论您是编程新手还是希望深化技能的开发者,都欢迎加入我们的学习之旅,共同交流进步!
🌟 学习大礼包包含内容:
Python全领域学习路线图:一目了然,指引您从基础到进阶,再到专业领域的每一步学习路径,明确各方向的核心知识点。
超百节Python精品视频课程:涵盖Python编程的必备基础知识、高效爬虫技术、以及深入的数据分析技能,让您技能全面升级。
实战案例集锦:精选超过100个实战项目案例,从理论到实践,让您在解决实际问题的过程中,深化理解,提升编程能力。
华为独家Python漫画教程:创新学习方式,以轻松幽默的漫画形式,让您随时随地,利用碎片时间也能高效学习Python。
互联网企业Python面试真题集:精选历年知名互联网企业面试真题,助您提前备战,面试准备更充分,职场晋升更顺利。
👉 立即领取方式:只需【点击这里】,即刻解锁您的Python学习新篇章!让我们携手并进,在编程的海洋里探索无限可能