代码:
# ===========================准备数据================================
from IPython.core.interactiveshell import InteractiveShell
import glob
InteractiveShell.ast_node_interactivity = 'all'
# *是通配符,匹配出data文件夹下的所有txt文件
all_filenames = glob.glob('data/*.txt')
# print(all_filenames)
# ===========================转换编码================================
import unicodedata
import string
# 姓氏中所有的字符
# string.ascii_letters是大小写各26字符
all_letters = string.ascii_letters + '.,;'
# 字符的种类数
n_letters = len(all_letters)
# 将Unicode编码转换成标准的ASCII码
def unicode_to_ascii(s):
return ''.join(
c for c in unicodedata.normalize('NFD', s)
if unicodedata.category(c) != 'Mn'
and c in all_letters
)
# print(n_letters) # 字符数为57个
# print(unicode_to