########提取需要的信息import re
import pandas as pd
# 读取原始数据文件
df_origin = pd.read_excel(r'D:\文件\2024年\origin-data.xlsx')# 提取需要处理的列数据
log_data = df_origin['content'].tolist()# 定义结果列表
result =[]# 遍历每条日志数据for log in log_data:
uid = re.search(r'uid:(.*?),', log).group(1)
err_codes = re.findall(r'err_code":(\d+),"originImageUrl":"(.*?)"', log)for code, url in err_codes:
result.append([uid, code, url])# 创建结果数据框
df_result = pd.DataFrame(result, columns=['uid','err_code','originImageUrl'])# 保存结果到文件
df_result.to_excel(r'D:\文件\2024年\output.xlsx', index=False)######保存到文件夹中import os
import requests
import pandas as pd
defdownload_images(uid, err_code, originImageUrl):
response = requests.get(originImageUrl)
folder_path =f"D:/文件/2024年/{err_code}/"ifnot os.path.exists(folder_path):
os.makedirs(folder_path)
file_name =f"{uid}_{err_code}_{len(os.listdir(folder_path))+1}.jpg"withopen(os.path.join(folder_path, file_name),"wb")asfile:file.write(response.content)# Read the Excel file
df = pd.read_excel(r'D:\文件\2024年\output.xlsx')# Iterate over each row in the DataFramefor index, row in df.iterrows():
uid = row["uid"]
err_code = row["err_code"]
originImageUrl = row["originImageUrl"]
download_images(uid, err_code, originImageUrl)
import ast
import os
import requests
import pandas as pd
# 读取数据
df = pd.read_excel(r"C:\Users\UFOTO\Desktop\批量改格式_20240227092057\pic_list_转自TXT.xlsx")# 遍历每一行for index, row in df.iterrows():
uid = row["uid"]
content = row["content"]# 将content字符串转换为列表
urls = ast.literal_eval(content)# 遍历每一个URLfor i, url inenumerate(urls):# 创建文件夹
folder_name =f"{uid}_{i+1}"
folder_path =f"D:/文件/2024年/pic_list/{uid}/"ifnot os.path.exists(folder_path):
os.makedirs(folder_path)# 下载图片try:
response = requests.get(url)except:print(f"Error: Failed to download {url}")continue# 保存图片
file_name =f"{uid}_{i+1}_{url.split('/')[-1].replace('?','_')}.jpg"try:withopen(os.path.join(folder_path, file_name),"wb")asfile:file.write(response.content)except:print(f"Error: Failed to save {url}")continue
格式标准的
import os
import pandas as pd
import requests
import random
# Read the Excel file
df = pd.read_csv(r"C:\Users\UFOTO\Downloads\image2video.csv")# Specify the directory to save images
save_directory =r"D:\文件\2024年\selfieu\dance"# Create the directory if it doesn't existifnot os.path.exists(save_directory):
os.makedirs(save_directory)# Initialize a counter variable
counter =1# Iterate over each row in the DataFramefor index, row in df.iterrows():
user_id = row["user_id"]
request_url = row["request_key"]
result_url = row["result_video"]# Function to save image from URLdefsave_image(url, filename):
response = requests.get(url)withopen(filename,"wb")as f:
f.write(response.content)# Extract and save request_url image
request_filename =f"{user_id}_request_{counter}.jpg"
request_filepath = os.path.join(save_directory, request_filename)
save_image(request_url, request_filepath)# Extract and save result_url image
result_filename =f"{user_id}_result_{counter}.MP4"
result_filepath = os.path.join(save_directory, result_filename)
save_image(result_url, result_filepath)# Increment the counter
counter +=1