# load_files函数解析
def load_files(container_path, description=None, categories=None,
load_content=True, shuffle=True, encoding=None,
decode_error='strict', random_state=0):
# target存放的是所有文档的类标签数字编号,filenames存放的是文档的完全路径名,他和target一一对应,target_name存放的类标签名
target = []
target_names = []
filenames = []
# folders:是categories的所有文件名
# listdir以list的形式返回container_path路径下的所有子路径名
folders = [f for f in sorted(listdir(container_path))
if isdir(join(container_path, f))]
# 如果categories不为none,则过滤掉非categories的路径
if categories is not None:
folders = [f for f in folders if f in categories]
# enumerate:枚举,它允许我们遍历数据并⾃动计数(从0开始技术:0,1,2...)
for label, folder in enumerate(folders):
target_names.append(folder)
folder_path = join(container_path, fo
scikit-learn中load_files函数源码解析
最新推荐文章于 2024-07-02 00:11:19 发布