import os
import codecs
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib
content_len=[]
for file in os.listdir("../input/tianchiner/train/"):
if not file.endswith(".txt"):
pass
else:
with codecs.open("../input/tianchiner/train/"+file,"r",encoding="utf-8") as f:
content_str=f.read()
content_len.append(len(content_str))
np.percentile(content_len,95)
#设置matplotlib正常显示中文和符号
matplotlib.rcParams["font.sans-serif"]=['SimHei'] #用黑体显示中文
matplotlib.rcParams['axes.unicode_minus']=False
plt.figure(figsize=(10,5))
plt.hist(content_len,bins=20,range=(0,1500),facecolor="blue",edgecolor="black",alpha=0.7)
plt.xlabel("长度区间")
plt.ylabel("样本数")
plt.title("文本长度分布直方图")
matlabplotlib显示中文
最新推荐文章于 2025-04-08 21:54:50 发布