from PIL import Image
import os
import requests
from baiduspider import BaiduSpider
from requests.exceptions import Timeout
import time
start_query_index = 0
with open('search_terms.txt', 'r', encoding='utf-8') as file:
for _ in range(start_query_index - 1):
next(file)
queries = file.read().splitlines()
save_dir = "AIGC"
if not os.path.exists(save_dir):
os.makedirs(save_dir)
for query_index, query in enumerate(queries, start=start_query_index):
print(f"Processing query {query_index}: {query}")
try:
results = BaiduSpider().search_pic(query=query)
except KeyError as e:
print(f"Error occurred during search for query {query_index}: {e}")
continue
query_save_dir = os.path.join(save_dir, str(query_index))
if not os.path.exists(query_save_dir):
os.makedirs(query_save_dir)
file_counter = 1
for image_index, result in enumerate(results):
url = result.url
print(f"Downloading image {image_index + 1} for query {query_index}: {url}")
try:
response = requests.get(url, stream=True, timeout=5, verify=False)
if response.status_code == 200:
temp_file_path = os.path.join(query_save_dir, 'temp.jpg')
with open(temp_file_path, 'wb') as file:
for chunk in response.iter_content(1024):
file.write(chunk)
print(f"Image downloaded temporarily.")
img = Image.open(temp_file_path)
img_resized = img.resize((1920, 1080), Image.Resampling.LANCZOS)
file_name = f'image_{file_counter}.jpg'
final_file_path = os.path.join(query_save_dir, file_name)
img_resized.save(final_file_path)
os.remove(temp_file_path)
print(f"Image {file_counter} processed and saved successfully.")
file_counter += 1
else:
print(f"Failed to download image {image_index + 1} for query {query_index}. Status code: {response.status_code}")
except Timeout:
print(f"Request timed out after 5 seconds for image {image_index + 1} of query {query_index}. Skipping...")
except Exception as e:
print(f"An error occurred while downloading image {image_index + 1} for query {query_index}: {str(e)}")
time.sleep(10)