python入门教程至此已学习完毕,下面是结业脚本:(一部分是书里的源码,一部分是自己加的练习题)
#写一个文本统计的脚本:计算并打印有关文本文件的统计数据,包括文件里包含多少个字符、行、单词数,以及前10个出现次数最多的单词按顺序排列
import time
keep=['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',' ','-',"'"]
stop_words=['the','and','i','to','of','a','you','my','that','in','she','he','her','his','it','be','was','had']
def normalize(s):
result=''
for c in s.lower():
if c in keep:
result+=c
return result
def make_dict(s):
words=normalize(s).split()
d={}
for w in words:
if w in d:
d[w]+=1
else:
d[w]=1
return d
def file_status(f):
c=open(f).read()
''' 采用每次读取一行的方式
fopen=open(f)
c=''
for line in fopen:
c+=line
'''
print(f,'status:')
print('长度:',len(c))
print('行数:',c.count('\n'))
print('单词数:',len(normalize(c).split()))
d=make_dict(c)
print('单词数:',sum(d[w] for w in d))
print('不同单词数:',len([w for w in d]))
print('单词平均长度:',sum(l