Python提取Excel表格内超链接并将对应文件按类别添加到相应同时新建的文件夹中
示例代码
import os
import shutil
import pandas as pd
import openpyxl
import urllib.parse
wb = openpyxl.load_workbook('PPP项目政策梳理目录.xlsx')
ws = wb['案例指引']
file_paths = []
categories = []
for row in ws.rows:
hyperlink_cell = row[3]
category_cell = row[7]
if hyperlink_cell.hyperlink:
hyperlink_url = hyperlink_cell.hyperlink.target
try:
file_path = urllib.parse.unquote(hyperlink_url)
if not os.path.exists(file_path):
print(f'无法找到链接对应的文件: {file_path}')
continue
file_paths.append(file_path)
except Exception as e:
print(f'解析超链接出错: {e}')
continue
category = category_cell.value
categories.append(category)
df = pd.DataFrame({'文件链接': file_paths, '分类': categories})
target_dir = r'C:\Users\z_ml_\Desktop\新分类2'
for group, df_group in df.groupby('分类'):
group_folder = os.path.join(target_dir, group)
if not os.path.exists(group_folder):
os.makedirs(group_folder)
for index, row in df_group.iterrows():
try:
file_path = row['文件链接']
shutil.copy(file_path, group_folder)
except Exception as e:
print(f'无法复制文件: {file_path},错误信息: {e}')
continue
解释都在注释里