import json
import boto3
import io
import fitz
from PIL import Image
from botocore.exceptions import ClientError
from datetime import datetime, timedelta
def download_pdf_from_s3(s3_bucket, s3_key, s3_region):
"""
从 S3 下载 PDF 文件
"""
s3_client = boto3.client('s3', region_name=s3_region)
try:
response = s3_client.get_object(Bucket=s3_bucket, Key=s3_key)
pdf_data = response['Body'].read()
return pdf_data
except ClientError as e:
return {"errorCode": -900002, "message": "Failed to download file from S3.","result":None}
def convert_single_page(pdf_data, resolution, prefix, metadata, unique_id, day, s3_bucket_name, s3_region, file_name, page_num):
"""
PDF 单页转换
"""
try:
pdf_document = fitz.open(stream=pdf_data, filetype="pdf")
page = pdf_document[page_num] # 只处理第 page_num 页
zoom = resolution / 72
mat = fitz.Matrix(zoom, zoom)
pix = page.get_pixmap(matrix=mat)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
# 保存图片为字节流
img_buffer = io.BytesIO()
img.save(img_buffer, format="PNG")
img_buffer.seek(0)
# 将图片上传到 S3
response = save_page_to_s3(prefix, metadata, img_buffer.getvalue(), unique_id, day, s3_bucket_name, s3_region, file_name)
return response
except Exception as e:
return {"errorCode": -900003, "message": "PDF single page conversion error.","result":None}
def save_page_to_s3(prefix, metadata, file_stream, unique_id, day, s3_bucket_name, s3_region, file_name):
"""
存储文件到 S3,返回文件 key
"""
s3_client = boto3.client('s3', region_name=s3_region)
try:
s3_key = f"{prefix}/{unique_id}/{file_name}"
# 计算过期时间
date = datetime.now()
cal = date + timedelta(days=day)
extra_args = {"Metadata": metadata or {}, "Expires": cal}
# 上传文件到 S3
s3_client.upload_fileobj(
io.BytesIO(file_stream),
s3_bucket_name,
s3_key,
ExtraArgs=extra_args
)
return {"errorCode": 0, "message": "success.","result":{"key": s3_key}}
except ClientError as e:
return {"errorCode": -900004, "message": "Failed to save file from S3.","result":None}
def lambda_handler(event, context):
"""
Lambda 入口函数
"""
try:
resolution = event.get("resolution")
prefix = event.get("prefix")
metadata = event.get("metadata", {})
day = event.get("day")
s3_bucket = event.get("s3Bucket")
s3_region = event.get("s3Region")
unique_id = event.get("uniqueId")
file_name = event.get("fileName")
pdf_file_key = event.get("pdfFileKey")
page_num = event.get("pageNum")
# 从S3下载pdf
pdf_data = download_pdf_from_s3(s3_bucket, pdf_file_key, s3_region)
# 调用单页 PDF 转换函数
return convert_single_page(pdf_data, resolution, prefix, metadata, unique_id, day, s3_bucket, s3_region, file_name, page_num)
except Exception as e:
return {"statusCode": -900001,"message": "Lambda unknown error." ,"result": None}
怎么算像素点大小