def getFileText(): #定义函数
filTxt=open("../Stu_pack/file/letter.txt","r").read()
filTxt=filTxt.lower() #
for ch in '!"#$%&()*+-*/,.:;<=>?[]\\^_{}|~':
filTxt=filTxt.replace(ch," ")
return filTxt
letterTxt=getFileText()
words=letterTxt.split()
wdCountDict={}
excludes={'the','of','you','your','that','will',"don't"}
for word in words:
wdCountDict[word]=wdCountDict.get(word,0)+1
for word in excludes:
del(wdCountDict[word])
items=list(wdCountDict.items())
items.sort(key=lambda x:x[1],reverse=True)
print("{0:<10}{1:>5}".format("word","count"))
print("_"*21)
for key,val in items:
if len(key)>3 and val>2:
print("{0:<10}{1:>5}".format(key,val))
文本自频统计
最新推荐文章于 2025-12-02 21:20:38 发布
该代码段展示了一个Python函数,用于读取文件内容,去除特殊字符,转换为小写,统计单词出现次数,并排除预定义的常见词汇。最后,它按词频降序排列并打印出长度大于3且计数大于2的单词。
862

被折叠的 条评论
为什么被折叠?



