WIDTH_BUCKET函数

本文深入解析了WIDTH_BUCKET函数的工作原理及应用,展示了如何通过该函数将数据集划分成指定数量的存储桶,适用于数值和日期类型的数据。通过两个具体实例,读者可以了解如何在实际场景中使用此函数进行数据分组。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

WIDTH_BUCKET:-

===============

此功能组的结果集进入了存储桶,但尝试创建相等的存储桶。此功能可以在数值或日期数据类型上工作,并且需要4个参数。

a)产生桶的表达。

b)一号桶开始使用的价值

c)用作第N个桶的范围结束时的值。

d)要创建的桶数(N)。

示例Ex#1

===============

SELECT DEPTNO,EMPNO,SUM(SAL) SALARY,WIDTH_BUCKET(SUM(SAL),1,3000,3)SAL_BUCKET
FROM EMP
GROUP BY DEPTNO,EMPNO 
ORDER BY SAL_BUCKET;
示例Ex#2

===============

SELECT DEPTNO,EMPNO,SUM(SAL) SALARY,WIDTH_BUCKET(SUM(SAL),1000,6000,3)
SAL_BUCKET FROM EMP
GROUP BY DEPTNO,EMPNO 
ORDER BY SAL_BUCKET;

From: https://bytes.com/topic/oracle/insights/671956-width_bucket-function

import json import boto3 import io import fitz from PIL import Image from botocore.exceptions import ClientError from datetime import datetime, timedelta def download_pdf_from_s3(s3_bucket, s3_key, s3_region): """ 从 S3 下载 PDF 文件 """ s3_client = boto3.client('s3', region_name=s3_region) try: response = s3_client.get_object(Bucket=s3_bucket, Key=s3_key) pdf_data = response['Body'].read() return pdf_data except ClientError as e: return {"errorCode": -900002, "message": "Failed to download file from S3.","result":None} def convert_single_page(pdf_data, resolution, prefix, metadata, unique_id, day, s3_bucket_name, s3_region, file_name, page_num): """ PDF 单页转换 """ try: pdf_document = fitz.open(stream=pdf_data, filetype="pdf") page = pdf_document[page_num] # 只处理第 page_num 页 zoom = resolution / 72 mat = fitz.Matrix(zoom, zoom) pix = page.get_pixmap(matrix=mat) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) # 保存图片为字节流 img_buffer = io.BytesIO() img.save(img_buffer, format="PNG") img_buffer.seek(0) # 将图片上传到 S3 response = save_page_to_s3(prefix, metadata, img_buffer.getvalue(), unique_id, day, s3_bucket_name, s3_region, file_name) return response except Exception as e: return {"errorCode": -900003, "message": "PDF single page conversion error.","result":None} def save_page_to_s3(prefix, metadata, file_stream, unique_id, day, s3_bucket_name, s3_region, file_name): """ 存储文件到 S3,返回文件 key """ s3_client = boto3.client('s3', region_name=s3_region) try: s3_key = f"{prefix}/{unique_id}/{file_name}" # 计算过期时间 date = datetime.now() cal = date + timedelta(days=day) extra_args = {"Metadata": metadata or {}, "Expires": cal} # 上传文件到 S3 s3_client.upload_fileobj( io.BytesIO(file_stream), s3_bucket_name, s3_key, ExtraArgs=extra_args ) return {"errorCode": 0, "message": "success.","result":{"key": s3_key}} except ClientError as e: return {"errorCode": -900004, "message": "Failed to save file from S3.","result":None} def lambda_handler(event, context): """ Lambda 入口函数 """ try: resolution = event.get("resolution") prefix = event.get("prefix") metadata = event.get("metadata", {}) day = event.get("day") s3_bucket = event.get("s3Bucket") s3_region = event.get("s3Region") unique_id = event.get("uniqueId") file_name = event.get("fileName") pdf_file_key = event.get("pdfFileKey") page_num = event.get("pageNum") # 从S3下载pdf pdf_data = download_pdf_from_s3(s3_bucket, pdf_file_key, s3_region) # 调用单页 PDF 转换函数 return convert_single_page(pdf_data, resolution, prefix, metadata, unique_id, day, s3_bucket, s3_region, file_name, page_num) except Exception as e: return {"statusCode": -900001,"message": "Lambda unknown error." ,"result": None} 怎么算像素点大小
03-22
ef download_pdf_from_s3(s3_bucket, s3_key, s3_region): “”" 从 S3 下载 PDF 文件 “”" s3_client = boto3.client(‘s3’, region_name=s3_region) try: response = s3_client.get_object(Bucket=s3_bucket, Key=s3_key) pdf_data = response[‘Body’].read() print(0) return pdf_data except ClientError as e: return {“errorCode”: -900002, “message”: “Failed to download file from S3.”,“result”:None} def batch_render_page(pdf_data, resolution, page_num, num_batches=8): “”“对页面进行分批渲染”“” pdf_document = fitz.open(stream=pdf_data, filetype=“pdf”) page = pdf_document.load_page(page_num) zoom = resolution / 72 mat = fitz.Matrix(zoom, zoom) width, height = page.rect.width, page.rect.height batch_height = height / num_batches print(batch_height) images = [] for i in range(num_batches): # 定义每个批次的裁剪区域 clip = fitz.Rect(0, i * batch_height, width, (i + 1) * batch_height) pix = page.get_pixmap(matrix=mat, clip=clip) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) images.append(img) # 返回所有渲染结果 return images def convert_single_page(pdf_data, resolution, prefix, metadata, unique_id, day, s3_bucket_name, s3_region, file_name, page_num, num_batches=8): “”" PDF 单页转换 “”" try: images = batch_render_page(pdf_data, resolution, page_num, num_batches) img_width = images[0].width img_height = sum(img.height for img in images) # 总高度是所有图片高度之和 # 创建空白图片用于拼接 combined_image = Image.new("RGB", (img_width, img_height)) # 将每个批次的图片粘贴到最终图片中 current_height = 0 for img in images: combined_image.paste(img, (0, current_height)) current_height += img.height # 将拼接后的图片保存到内存 img_buffer = io.BytesIO() combined_image.save(img_buffer, format="PNG") img_buffer.seek(0) # 将图片上传到 S3 response = save_page_to_s3(prefix, metadata, img_buffer.getvalue(), unique_id, day, s3_bucket_name, s3_region, file_name) return response except Exception as e: return {"errorCode": -900003, "message": "PDF single page conversion error.","result":str(e)}这个会卡在page.get_pixmap,怎么用多线程解决,CPU密集吗给出改进后的完整代码
03-20
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值