统计值计算实例
#CalStatisticsV1.py
def getnum(): #获得用户一连串数字输入
nums = []
inumstr = input('请输入数字(回车退出):')
while inumstr !='':
nums.append(eval(inumstr))
inumstr = input('请输入数字(回车退出):')
return nums
def mean(numbers): #计算平均值
s = 0.0
for num in numbers:
s+=num
return s/len(numbers)
def dev(numbers,mean): #计算方差
sdev = 0.0
for num in numbers:
sdev+=(num-mean)**2
return pow(sdev/(len(numbers)-1),0.5)
def median(numbers): #计算中位数
sorted(numbers)
size = len(numbers)
if size % 2 == 0:
med = (numbers[size//2-1]+numbers[size//2])/2
else:
med = numbers[size//2]
return med
n=getnum()
m=mean(n)
print('平均值:{},方差:{},中位数:{}.'.format(m,dev(n,m),median(n)))
文本词频统计
Hamlet词频统计
#CalHamletV1.py
def gettext():
txt = open('D://TXT2019//Hamlet.txt','r').read() #用\或者//代替/,python中/为转字符
txt = txt.lower() #将所有大写字符串转换为小写
for ch in '!"#$%()*+,-./:;<=>?@[\\]^_‘{|}~':
txt = txt.replace(ch,'')
return txt
HT = gettext()
words = HT.split()
counts = {}
for word in words:
counts[word] = counts.get(word,0)+1
items = list(counts.items()) #items()以列表返回可遍历的(键, 值)元组数组[(,),(,),(,)];list()转换为列表类型
items.sort(key=lambda x:x[1],reverse=True) #sort()用于排序,根据iems的第二个值进行从大到小的排序
for i in range(15):
word,count = items[i] #items的形式应该是[(,),(,),(,),(,)]
print('{:<10}{:>5}'.format(word,count))
运行效果如下:
D:\Anaconda3\python.exe D:/Python_pycharm_projects/yuyanseji_examples(20190404)/CalHamletV1.py
the 1137
and 963
to 736
of 669
you 546
i 540
a 527
my 513
hamlet 459
in 435
it 415
that 389
is 340
not 312
lord 308
Process finished with exit code 0
三国词频统计
#CalThreeKingdomsV1.py
import jieba as t
txt = open('D:\TXT2019\Threekingdoms.txt','r',encoding='utf-8').read()
words = t.lcut(txt) #返回的是中文分词列表
counts = {} #建立空字典类型,用于统计分词与数量
for word in words: #遍历列表中每个中文分词
if len(word)==1:
continue
else:
counts[word]=counts.get(word,0)+1 #统计分词和数量
items=list(counts.items()) #将统计完成的字典类型转换成元组数组,再转换成列表
items.sort(key=lambda x:x[1],reverse=True) #然后对列表中的元组元素第二个从大到小进行排序
for i in range(10): #遍历输出列表中前10个元组元素
word,count=items[i]
print('{:<10}{:>5}'.format(word,count))
运行效果如下:
D:\Anaconda3\python.exe D:/Python_pycharm_projects/yuyanseji_examples(20190404)/CalThreeKingdomsV1.py
Building prefix dict from the default dictionary ...
Loading model from cache C:\Users\Public\Documents\iSkysoft\CreatorTemp\jieba.cache
Loading model cost 1.128 seconds.
Prefix dict has been built succesfully.
曹操 953
孔明 836
将军 772
却说 656
玄德 585
关公 510
丞相 491
二人 469
不可 440
荆州 425
Process finished with exit code 0
#CalThreekingdomsV2.py
import jieba
excludes={'将军','却说','荆州','二人','不可','不能','如此'}
txt = open('D:\TXT2019\Threekingdoms.txt','r',encoding='utf-8').read()
words = jieba.lcut(txt)
counts={}
for word in words:
if len(word)==1:
continue
elif word == '诸葛亮'or word == '孔明曰':
rword='孔明'
elif word == '关公'or word == '云长':
rword='关羽'
elif word == '玄德' or word == '玄德曰':
rword ='刘备'
elif word =='孟德' or word == '丞相':
rword == '曹操'
else:
rword = word
counts[rword]=counts.get(rword,0)+1
for word in excludes:
del counts[word]
items = list(counts.items())
items.sort(key=lambda x:x[1] ,reverse=True)
for i in range(15):
word , count=items[i]
print('{:<10}{:>5}'.format(word ,count))
运行效果如下:
D:\Anaconda3\python.exe D:/Python_pycharm_projects/yuyanseji_examples(20190404)/CalThreeKingdomsV2.py
Building prefix dict from the default dictionary ...
Dumping model to file cache C:\Users\Public\Documents\iSkysoft\CreatorTemp\jieba.cache
Loading model cost 1.944 seconds.
Prefix dict has been built succesfully.
孔明 1386
刘备 1254
曹操 953
关羽 786
张飞 358
商议 345
如何 339
主公 331
军士 317
吕布 300
左右 296
军马 293
赵云 279
引兵 276
次日 271
Process finished with exit code 0