从Excel文件中的图片地址批量下载到文件夹中

########提取需要的信息
import re
import pandas as pd

# 读取原始数据文件
df_origin = pd.read_excel(r'D:\文件\2024年\origin-data.xlsx')

# 提取需要处理的列数据
log_data = df_origin['content'].tolist()

# 定义结果列表
result = []

# 遍历每条日志数据
for log in log_data:
    uid = re.search(r'uid:(.*?),', log).group(1)
    err_codes = re.findall(r'err_code":(\d+),"originImageUrl":"(.*?)"', log)
    for code, url in err_codes:
        result.append([uid, code, url])

# 创建结果数据框
df_result = pd.DataFrame(result, columns=['uid', 'err_code', 'originImageUrl'])

# 保存结果到文件
df_result.to_excel(r'D:\文件\2024年\output.xlsx', index=False)

######保存到文件夹中
import os
import requests
import pandas as pd

def download_images(uid, err_code, originImageUrl):
    response = requests.get(originImageUrl)
    folder_path = f"D:/文件/2024年/{err_code}/"
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    file_name = f"{uid}_{err_code}_{len(os.listdir(folder_path)) + 1}.jpg"
    with open(os.path.join(folder_path, file_name), "wb") as file:
        file.write(response.content)

# Read the Excel file
df = pd.read_excel(r'D:\文件\2024年\output.xlsx')

# Iterate over each row in the DataFrame
for index, row in df.iterrows():
    uid = row["uid"]
    err_code = row["err_code"]
    originImageUrl = row["originImageUrl"]
    download_images(uid, err_code, originImageUrl)
import ast
import os
import requests
import pandas as pd

# 读取数据
df = pd.read_excel(r"C:\Users\UFOTO\Desktop\批量改格式_20240227092057\pic_list_转自TXT.xlsx")

# 遍历每一行
for index, row in df.iterrows():
    uid = row["uid"]
    content = row["content"]
    
    # 将content字符串转换为列表
    urls = ast.literal_eval(content)
    
    # 遍历每一个URL
    for i, url in enumerate(urls):
        # 创建文件夹
        folder_name = f"{uid}_{i+1}"
        folder_path = f"D:/文件/2024年/pic_list/{uid}/"
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
        
        # 下载图片
        try:
            response = requests.get(url)
        except:
            print(f"Error: Failed to download {url}")
            continue
        
        # 保存图片
        file_name = f"{uid}_{i+1}_{url.split('/')[-1].replace('?', '_')}.jpg"
        try:
            with open(os.path.join(folder_path, file_name), "wb") as file:
                file.write(response.content)
        except:
            print(f"Error: Failed to save {url}")
            continue

格式标准的

import os
import pandas as pd
import requests
import random

# Read the Excel file
df = pd.read_csv(r"C:\Users\UFOTO\Downloads\image2video.csv")

# Specify the directory to save images
save_directory = r"D:\文件\2024年\selfieu\dance"

# Create the directory if it doesn't exist
if not os.path.exists(save_directory):
    os.makedirs(save_directory)
    
# Initialize a counter variable
counter = 1

# Iterate over each row in the DataFrame
for index, row in df.iterrows():
    user_id = row["user_id"]
    request_url = row["request_key"]
    result_url = row["result_video"]
    

    # Function to save image from URL
    def save_image(url, filename):
        response = requests.get(url)
        with open(filename, "wb") as f:
            f.write(response.content)

    # Extract and save request_url image
    request_filename = f"{user_id}_request_{counter}.jpg"
    request_filepath = os.path.join(save_directory, request_filename)
    save_image(request_url, request_filepath)

    # Extract and save result_url image
    result_filename = f"{user_id}_result_{counter}.MP4"
    result_filepath = os.path.join(save_directory, result_filename)
    save_image(result_url, result_filepath)

    # Increment the counter
    counter += 1
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值