import os
path='C:\\a_9ybx.txt'
path1='C:\\a_9ybx_out.txt'
path2='C:\\a_9ybx_out1.txt'
liwai=[",","。","(",")"," ","、"," ",":",";","《","》","?","【","】","!"]
print("***************************************")
print()
print("名称:汉字词频统计工具软件 CPTJ v1.0")
print()
print("功能描述:将输入文本文件所包含的汉字进行统计,并进行输出。")
print()
print("方便汉字初学者对高频汉字进行学习。")
print()
print("作者:李刚 ")
print()
print("EMAIL:lglgang@126.com")
print()
print("版权所有:天津市九赢百信科技有限公司")
print()
print("***************************************")
a=1
while a==1:
print()
print("***************************************")
print()
print("请准备好待统计的文本文件UTF-8格式,并命名为a_9ybx.txt保存到C盘的根目录下")
str = input("如果准备好了文件请输入(Y):")
print("您输入的是:",str)
if str=='Y':
a=0
str2 = input("是否显示输出(Y显示,N不显示):")
if str2=='Y':
xx=1
else:
xx=0
f=open(path,encoding='utf-8',mode='r')
f1=open(path1,encoding='utf-8',mode='w+')
f2=open(path2,encoding='utf-8',mode='w+')
print(f)
n=os.path.getsize(path)
print(os.path.getsize(path))
a=n
tel={'0':0}
data=f.read(1)
n=1
while a>0:
a=a-1
data=f.read(1)
if data in tel:
tel[data]=tel[data]+1
else:
tel[data]=1
n=n+1
f.close()
print()
print("***************************************")
print()
print("***************************************")
dict= sorted(tel.items(),key=lambda x:x[1],reverse=True)
x=0
m=n
nmax=0
nuse=0
danzishu=0
print(type(dict[x][0]))
while n>0:
if len(dict[x][0])==1:
if ord(dict[x][0])>10000:
if dict[x][0] not in liwai:
nmax=nmax+dict[x][1]
danzishu=danzishu+1
n=n-1
x=x+1
x=0
n=m
while n>0:
if len(dict[x][0])==1:
if ord(dict[x][0])>10000:
if dict[x][0] not in liwai:
nuse=nuse+dict[x][1]
if xx==1:
print(dict[x][0],"---",dict[x][1], "完成率:",round(nuse/nmax*100,2),"%")
f1.write(dict[x][0])
n=n-1
x=x+1
print("总字数:",nmax)
print("单字数:",danzishu)
f1.close()
x=0
n=m
while n>0:
if len(dict[x][0])==1:
if ord(dict[x][0])>10000:
if dict[x][0] not in liwai:
nuse=nuse+dict[x][1]
f2.write(dict[x][0])
f2.write(",")
f2.write('%d\n' % dict[x][1])
n=n-1
x=x+1
f2.close()