分享一些常用的Python脚本,需要手动去安装对应的Python库。
一、压缩图片
input_folder是原图片文件夹路径,output_folder是输出文件夹路径。
脚本运行后会将图片大小压缩,但画质基本保持。
import os
from PIL import Image
# 设置输入和输出文件夹路径
input_folder = r"C:\Users\Administrator\Desktop\r\111"
output_folder = r"C:\Users\Administrator\Desktop\r\out"
quality = 30 # 图片压缩质量,范围1-95,数值越低,压缩越多
def compress_images(input_folder, output_folder, quality):
# 确保输出文件夹存在
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# 遍历输入文件夹中的所有文件
for root, dirs, files in os.walk(input_folder):
for file in files:
if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.webp')):
input_path = os.path.join(root, file)
output_path = os.path.join(output_folder, file)
try:
with Image.open(input_path) as img:
img = img.convert("RGB") # 确保所有图像是RGB格式
img.save(output_path, "JPEG", quality=quality)
print(f"Compressed: {input_path} → {output_path}")
except Exception as e:
print(f"Failed to compress {input_path}: {e}")
# 执行批量压缩
compress_images(input_folder, output_folder, quality)
二、多文件夹图片裁剪1:1大小
input_folders是原图片文件夹路径,base_output_folder是输出文件夹路径。
脚本运行后会将原图片文件夹中的图片居中1:1裁剪,然后另存为到输出文件夹中,保持图片和文件夹名字不变。
import os
from PIL import Image
def crop_images_to_square(input_folders, base_output_folder):
"""
Crop all images in multiple input folders to square shape and save them in corresponding output folders.
Args:
input_folders (list): List of paths to input folders containing images.
base_output_folder (str): Base path for output folders.
"""
for input_folder in input_folders:
# Create corresponding output folder structure
relative_folder = os.path.relpath(input_folder, start=os.path.commonpath(input_folders))
output_folder = os.path.join(base_output_folder, relative_folder)
os.makedirs(output_folder, exist_ok=True)
# Process each file in the current input folder
for filename in os.listdir(input_folder):
if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
img_path = os.path.join(input_folder, filename)
try:
with Image.open(img_path) as img:
# Get image dimensions
width, height = img.size
new_size = min(width, height)
# Calculate cropping box
left = (width - new_size) / 2
top = (height - new_size) / 2
right = (width + new_size) / 2
bottom = (height + new_size) / 2
# Crop and save the image
img_cropped = img.crop((left, top, right, bottom))
img_cropped.save(os.path.join(output_folder, filename))
print(f'Cropped and saved: {os.path.join(output_folder, filename)}')
except Exception as e:
print(f"Error processing file {img_path}: {e}")
if __name__ == '__main__':
# Define the list of input folders
input_folders = [
r'D:\work\1225img\input\ttin1USBCable-1',
r'D:\work\1225img\input\ttin1USBCable-2',
r'D:\work\1225img\input\ttin1USBCable-3',
r'D:\work\1225img\input\ttin1USBCable-4',
r'D:\work\1225img\input\ttin1USBCable-5',
r'D:\work\1225img\input\ttin1USBCable-6',
r'D:\work\1225img\input\ttin1USBCable-7',
]
# Define the base output folder
base_output_folder = r'D:\work\1225img\out'
# Crop images in all input folders
crop_images_to_square(input_folders, base_output_folder)
三、单文件夹图片裁剪1:1大小
input_folders是原图片文件夹路径,output_folder是输出文件夹路径。
脚本运行后会将原图片文件夹中的图片居中1:1裁剪,然后另存为到输出文件夹中,保持图片和文件夹名字不变。
import os
from PIL import Image
def crop_images_to_square(input_folder, output_folder):
# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)
# Loop through all files in the input folder
for filename in os.listdir(input_folder):
if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
# Open image
img_path = os.path.join(input_folder, filename)
with Image.open(img_path) as img:
# Get dimensions
width, height = img.size
# Calculate the new size (the smaller dimension)
new_size = min(width, height)
# Calculate cropping box
left = (width - new_size) / 2
top = (height - new_size) / 2
right = (width + new_size) / 2
bottom = (height + new_size) / 2
# Crop image
img_cropped = img.crop((left, top, right, bottom))
# Save the cropped image to the output folder
img_cropped.save(os.path.join(output_folder, filename))
print(f'Cropped and saved: {filename}')
if __name__ == '__main__':
input_folder = r'D:\work\1225img\input\ChargeCables43\ChargeCables43' # Replace with your input folder path
output_folder = r'D:\work\1225img\input\ChargeCables43\out' # Replace with your output folder path
crop_images_to_square(input_folder, output_folder)
四、爬取目标网站中的所有img图片
脚本运行后输入目标网站地址,会遍历该网站及其子页面所有img标签图片并保存到本地的downloaded_images文件夹中。
import os
import requests
from urllib.parse import urljoin
from bs4 import BeautifulSoup
import time
# 创建存储图片的文件夹
base_folder = 'downloaded_images'
if not os.path.exists(base_folder):
os.makedirs(base_folder)
# 用于记录已访问的页面
visited_urls = set()
# 获取页面内容
def get_page_content(url):
try:
response = requests.get(url)
response.raise_for_status() # 检查请求是否成功
return response.text
except requests.RequestException as e:
print(f"请求失败: {url}, 错误: {e}")
return None
# 下载图片并保存
def download_image(img_url, folder):
try:
img_data = requests.get(img_url).content
img_name = os.path.basename(img_url)
img_path = os.path.join(folder, img_name)
with open(img_path, 'wb') as f:
f.write(img_data)
print(f"图片已保存: {img_path}")
except Exception as e:
print(f"下载图片失败: {img_url}, 错误: {e}")
# 获取页面中的图片
def download_images_from_page(url):
html = get_page_content(url)
if not html:
return
soup = BeautifulSoup(html, 'html.parser')
img_tags = soup.find_all('img')
# 遍历所有图片标签
for img_tag in img_tags:
img_url = img_tag.get('src')
if img_url:
img_url = urljoin(url, img_url) # 处理相对URL
download_image(img_url, base_folder)
# 获取页面中的所有链接
def get_links_from_page(url):
html = get_page_content(url)
if not html:
return []
soup = BeautifulSoup(html, 'html.parser')
links = soup.find_all('a', href=True)
# 提取页面中的所有链接
link_urls = []
for link in links:
link_url = link['href']
full_url = urljoin(url, link_url) # 转换为绝对URL
if full_url.startswith('http') and full_url not in visited_urls:
link_urls.append(full_url)
visited_urls.add(full_url)
return link_urls
# 递归爬取页面及其子页面
def crawl(url):
visited_urls.add(url) # 将当前页面加入已访问列表
# 下载当前页面中的所有图片
download_images_from_page(url)
# 获取并访问页面中的所有子页面
links = get_links_from_page(url)
for link in links:
time.sleep(1) # 控制请求频率
crawl(link)
# 启动爬虫
if __name__ == '__main__':
# 从用户输入中获取目标网址
start_url = input("请输入目标网站的 URL: ").strip()
if start_url.startswith('http'):
visited_urls.add(start_url) # 将起始页面加入已访问列表
crawl(start_url)
print("所有图片下载完成!")
else:
print("输入的 URL 无效,请以 'http://' 或 'https://' 开头!")
input("按任意键退出...")
五、拆分PDF文件
split_pdf中第一个参数为原pdf路径,第二个参数为拆分后另存为的文件夹路径。
脚本运行后会将原PDF文件拆分为一页一页的PDF文件并保存到文件夹中。
from PyPDF2 import PdfReader, PdfWriter
def split_pdf(input_pdf, output_folder):
reader = PdfReader(input_pdf)
for page_number in range(len(reader.pages)):
writer = PdfWriter()
writer.add_page(reader.pages[page_number])
output_path = f"{output_folder}/page_{page_number + 1}.pdf"
with open(output_path, "wb") as output_file:
writer.write(output_file)
print(f"Page {page_number + 1} saved as {output_path}")
# Example usage
split_pdf("./merged.pdf", "./output_pages")
六、合并PDF文件
merge_pdfs中第一个参数为原pdf路径,第二个参数为合并后另存为的文件夹路径。
脚本运行后会将原PDF文件合并为一个PDF文件并保存到文件夹中。
from PyPDF2 import PdfMerger
def merge_pdfs(pdf_list, output_path):
merger = PdfMerger()
for pdf in pdf_list:
merger.append(pdf)
merger.write(output_path)
merger.close()
print(f"PDFs merged into {output_path}")
# Example usage
merge_pdfs(["1.pdf", "2.pdf"], "merged.pdf")