汉字字频统计工具（by九赢百信）-优快云博客

import os

path='C:\\a_9ybx.txt'

path1='C:\\a_9ybx_out.txt'

path2='C:\\a_9ybx_out1.txt'

liwai=["，","。","（","）"," ","、","　","：","；","《","》","？","【","】","！"]

print("***************************************")

print()

print("名称：汉字词频统计工具软件 CPTJ v1.0")

print()

print("功能描述：将输入文本文件所包含的汉字进行统计，并进行输出。")

print()

print("方便汉字初学者对高频汉字进行学习。")

print()

print("作者：李刚 ")

print()

print("EMAIL:lglgang@126.com")

print()

print("***************************************")

a=1

while a==1:

print()

print("***************************************")

print()

print("请准备好待统计的文本文件UTF-8格式，并命名为a_9ybx.txt保存到C盘的根目录下")

str = input("如果准备好了文件请输入(Y)：")

print("您输入的是：",str)

if str=='Y':

a=0

str2 = input("是否显示输出(Y显示，N不显示)：")

if str2=='Y':

xx=1

else:

xx=0

f=open(path,encoding='utf-8',mode='r')

f1=open(path1,encoding='utf-8',mode='w+')

f2=open(path2,encoding='utf-8',mode='w+')

print(f)

n=os.path.getsize(path)

print(os.path.getsize(path))

a=n

tel={'0':0}

data=f.read(1)

n=1

while a>0:

a=a-1

data=f.read(1)

if data in tel:

tel[data]=tel[data]+1

else:

tel[data]=1

n=n+1

f.close()

print()

print("***************************************")

print()

print("***************************************")

dict= sorted(tel.items(),key=lambda x:x[1],reverse=True)

x=0

m=n

nmax=0

nuse=0

danzishu=0

print(type(dict[x][0]))

while n>0:

if len(dict[x][0])==1:

if ord(dict[x][0])>10000:

if dict[x][0] not in liwai:

nmax=nmax+dict[x][1]

danzishu=danzishu+1

n=n-1

x=x+1

x=0

n=m

while n>0:

if len(dict[x][0])==1:

if ord(dict[x][0])>10000:

if dict[x][0] not in liwai:

nuse=nuse+dict[x][1]

if xx==1:

print(dict[x][0],"---",dict[x][1], "完成率：",round(nuse/nmax*100,2),"%")

f1.write(dict[x][0])

n=n-1

x=x+1

print("总字数：",nmax)

print("单字数：",danzishu)

f1.close()

x=0

n=m

while n>0:

if len(dict[x][0])==1:

if ord(dict[x][0])>10000:

if dict[x][0] not in liwai:

nuse=nuse+dict[x][1]

f2.write(dict[x][0])

f2.write(",")

f2.write('%d\n' % dict[x][1])

n=n-1

x=x+1

f2.close()

转载于:https://my.oschina.net/9ybx/blog/2934039