S3_bucketname

博客强调标签不能有大写字母,虽未提及具体标签相关背景,但此要求明确,在信息技术领域标签使用中需遵循。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

1.不能有大写字母
import json import boto3 import io import fitz from PIL import Image from botocore.exceptions import ClientError from datetime import datetime, timedelta def download_pdf_from_s3(s3_bucket, s3_key, s3_region): """ 从 S3 下载 PDF 文件 """ s3_client = boto3.client('s3', region_name=s3_region) try: response = s3_client.get_object(Bucket=s3_bucket, Key=s3_key) pdf_data = response['Body'].read() return pdf_data except ClientError as e: return {"errorCode": -900002, "message": "Failed to download file from S3.","result":None} def convert_single_page(pdf_data, resolution, prefix, metadata, unique_id, day, s3_bucket_name, s3_region, file_name, page_num): """ PDF 单页转换 """ try: pdf_document = fitz.open(stream=pdf_data, filetype="pdf") page = pdf_document[page_num] # 只处理第 page_num 页 zoom = resolution / 72 mat = fitz.Matrix(zoom, zoom) pix = page.get_pixmap(matrix=mat) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) # 保存图片为字节流 img_buffer = io.BytesIO() img.save(img_buffer, format="PNG") img_buffer.seek(0) # 将图片上传到 S3 response = save_page_to_s3(prefix, metadata, img_buffer.getvalue(), unique_id, day, s3_bucket_name, s3_region, file_name) return response except Exception as e: return {"errorCode": -900003, "message": "PDF single page conversion error.","result":None} def save_page_to_s3(prefix, metadata, file_stream, unique_id, day, s3_bucket_name, s3_region, file_name): """ 存储文件到 S3,返回文件 key """ s3_client = boto3.client('s3', region_name=s3_region) try: s3_key = f"{prefix}/{unique_id}/{file_name}" # 计算过期时间 date = datetime.now() cal = date + timedelta(days=day) extra_args = {"Metadata": metadata or {}, "Expires": cal} # 上传文件到 S3 s3_client.upload_fileobj( io.BytesIO(file_stream), s3_bucket_name, s3_key, ExtraArgs=extra_args ) return {"errorCode": 0, "message": "success.","result":{"key": s3_key}} except ClientError as e: return {"errorCode": -900004, "message": "Failed to save file from S3.","result":None} def lambda_handler(event, context): """ Lambda 入口函数 """ try: resolution = event.get("resolution") prefix = event.get("prefix") metadata = event.get("metadata", {}) day = event.get("day") s3_bucket = event.get("s3Bucket") s3_region = event.get("s3Region") unique_id = event.get("uniqueId") file_name = event.get("fileName") pdf_file_key = event.get("pdfFileKey") page_num = event.get("pageNum") # 从S3下载pdf pdf_data = download_pdf_from_s3(s3_bucket, pdf_file_key, s3_region) # 调用单页 PDF 转换函数 return convert_single_page(pdf_data, resolution, prefix, metadata, unique_id, day, s3_bucket, s3_region, file_name, page_num) except Exception as e: return {"statusCode": -900001,"message": "Lambda unknown error." ,"result": None} 怎么算像素点大小
03-22
ef download_pdf_from_s3(s3_bucket, s3_key, s3_region): “”" 从 S3 下载 PDF 文件 “”" s3_client = boto3.client(‘s3’, region_name=s3_region) try: response = s3_client.get_object(Bucket=s3_bucket, Key=s3_key) pdf_data = response[‘Body’].read() print(0) return pdf_data except ClientError as e: return {“errorCode”: -900002, “message”: “Failed to download file from S3.”,“result”:None} def batch_render_page(pdf_data, resolution, page_num, num_batches=8): “”“对页面进行分批渲染”“” pdf_document = fitz.open(stream=pdf_data, filetype=“pdf”) page = pdf_document.load_page(page_num) zoom = resolution / 72 mat = fitz.Matrix(zoom, zoom) width, height = page.rect.width, page.rect.height batch_height = height / num_batches print(batch_height) images = [] for i in range(num_batches): # 定义每个批次的裁剪区域 clip = fitz.Rect(0, i * batch_height, width, (i + 1) * batch_height) pix = page.get_pixmap(matrix=mat, clip=clip) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) images.append(img) # 返回所有渲染结果 return images def convert_single_page(pdf_data, resolution, prefix, metadata, unique_id, day, s3_bucket_name, s3_region, file_name, page_num, num_batches=8): “”" PDF 单页转换 “”" try: images = batch_render_page(pdf_data, resolution, page_num, num_batches) img_width = images[0].width img_height = sum(img.height for img in images) # 总高度是所有图片高度之和 # 创建空白图片用于拼接 combined_image = Image.new("RGB", (img_width, img_height)) # 将每个批次的图片粘贴到最终图片中 current_height = 0 for img in images: combined_image.paste(img, (0, current_height)) current_height += img.height # 将拼接后的图片保存到内存 img_buffer = io.BytesIO() combined_image.save(img_buffer, format="PNG") img_buffer.seek(0) # 将图片上传到 S3 response = save_page_to_s3(prefix, metadata, img_buffer.getvalue(), unique_id, day, s3_bucket_name, s3_region, file_name) return response except Exception as e: return {"errorCode": -900003, "message": "PDF single page conversion error.","result":str(e)}这个会卡在page.get_pixmap,怎么用多线程解决,CPU密集吗给出改进后的完整代码
03-20
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值