# 逐行读取文件并打印每行内容 withopen('test.txt','r', encoding='utf-8')asfile:for line infile:
line = line.strip()# 去除每行末尾的换行符 print(line)
逐行读取适用于大文件,可避免一次性读取过多内容导致内存溢出
3.读取指定行数的文件
defread_specific_lines(file_path, start, end):withopen(file_path,'r', encoding='utf-8')asfile:for i, line inenumerate(file):if start <= i <= end:print(line.strip())if i > end:break
read_specific_lines('test.txt',10,20)
import os
defbatch_rename(directory, old_str, new_str):for filename in os.listdir(directory):
new_filename = filename.replace(old_str, new_str)
os.rename(os.path.join(directory, filename), os.path.join(directory, new_filename))
batch_rename('.','old','new')
该函数可批量修改指定目录下文件的文件名。
六、高级文本处理操作
18.使用"BeautifulSoup"解析HTML文件
from bs4 import BeautifulSoup
withopen('test.html','r', encoding='utf-8')asfile:
soup = BeautifulSoup(file,'html.parser')
title = soup.title.text
print(title)
常用于从网页中提取数据。
19.使用"NLTK"进行自然语言处理(如分词)
import nltk
nltk.download('punkt')from nltk.tokenize import word_tokenize
text ="This is a sample sentence."
words = word_tokenize(text)print(words)
"NLTK"是自然语言处理领域的常用库。
20.使用"jieba"进行中文分词
import jieba
text ="我爱自然语言处理"
words = jieba.lcut(text)print(words)
"jieba"专门针对中文进行分词。
七、文本匹配与搜索操作
21.在文件中搜索特定关键词
defsearch_keyword_in_file(file_path, keyword):withopen(file_path,'r', encoding='utf-8')asfile:for line infile:if keyword in line:print(line.strip())
search_keyword_in_file('test.txt','Python')
defcaesar_cipher_encrypt(text, shift):
result =""for char in text:if char.isalpha():
offset =65if char.isupper()else97
result +=chr((ord(char)- offset + shift)%26+ offset)else:
result += char
return result
encrypted_text = caesar_cipher_encrypt("Hello, World!",3)print(encrypted_text)
一种简单的加密算法,用于示例学习。
24.使用"cryptography"库进行对称加密
from cryptography.fernet
import Fernet
key = Fernet.generate_key()
cipher_suite = Fernet(key)
text ="This is a secret message."
encrypted_text = cipher_suite.encrypt(text.encode())print(encrypted_text)
用于对文本进行更安全的对称加密。
九、文本压缩与解压缩操作
25.使用"zipfile"模块压缩文本文件
import zipfile
with zipfile.ZipFile('test.zip','w')as zip_file:
zip_file.write('test.txt')
可将一个或多个文件压缩到ZIP文件中。
26.使用"zipfile"模块解压文本文件
with zipfile.ZipFile('test.zip','r')as zip_file:
zip_file.extractall('extracted')
text ="hello"
char_freq ={}for char in text:
char_freq[char]= char_freq.get(char,0)+1print(char_freq)
了解文本中字符的分布情况。
29.生成随机文本(示例)
import random
import string
defgenerate_random_text(length):
letters = string.ascii_lowercase
return''.join(random.choice(letters)for i inrange(length))print(generate_random_text(10))
可生成指定长度的随机字符串。
30.文本可视化(使用"wordcloud"生成词云)
from wordcloud import WordCloud
text ="Python is a great programming language. Many people love Python."
wordcloud = WordCloud().generate(text)
wordcloud.to_image().show()