Apache Pinot是一个开源的实时分布式OLAP(Online Analytical Processing,联机分析处理)数据存储系统,专为低延迟、高并发的分析查询而设计,使得基于海量实时数据的快速分析和决策(如程序化广告竞价)成为可能。
我们为什么选择Apache Pinot?
确保竞价决策的实时性: 为了做出最优决策,系统必须能迅速查询到最新的历史性能数据,如特定用户群体、地理位置、设备类型下的平均CPC(每次点击成本)、CTR(点击率)等。Pinot正是为此类低延迟(毫秒级)分析查询而设计。它能确保在竞价决策的关键路径上,数据查询不会成为瓶颈。相比之下,传统的Hadoop、Spark SQL或OLTP数据库可能无法满足如此严苛的延迟要求。
保障系统的高可用与高并发: 大型DSP每天需要处理数亿甚至数十亿次的竞价请求,每个竞价请求都可能触发一次或多次对历史数据的查询,这带来了巨大的查询并发压力。Pinot的分布式架构和优化设计使其能够高效地处理高并发的查询请求,保证系统在流量高峰时依然稳定可靠。
利用最新的数据: 广告效果是动态变化的,今天的策略可能明天就不再有效。系统需要基于最新的数据(分钟级甚至秒级更新)进行决策。Pinot能直接从Kafka等消息队列中消费实时事件流(如广告展示、点击、转化数据),近乎实时地将这些数据索引并可供查询,确保决策依据的数据是“热乎的”。
高效执行复杂的分析聚合查询: Pinot采用列式存储和多种索引技术,对这类分析查询进行了深度优化,查询性能远超传统的行式数据库。Pinot的Schema设计和预聚合功能也进一步加速特定的查询模式。
代码实操
<dependency>
<groupId>org.apache.pinot</groupId>
<artifactId>pinot-java-client</artifactId>
<version>${pinot.version}</version>
</dependency>application.yml
server:
port: 8080
# Apache Pinot相关配置
pinot:
controller:
host: localhost # Pinot Controller的主机地址
port: 9000 # Pinot Controller的端口
broker:
# Broker地址列表,用于负载均衡或高可用
hosts:
- localhost:8009
connection:
# 查询超时时间(毫秒),防止长时间查询阻塞
query-timeout-ms: 30000
# 建立连接的超时时间(毫秒)
connection-timeout-ms: 5000
# Socket读取超时时间(毫秒)
socket-timeout-ms: 10000
# 默认数据库和表(如果查询中未指定)
default:
database: defaultDatabase
table: defaultTable
# 日志级别配置
logging:
level:
# 为本项目包设置DEBUG级别,便于查看详细日志
com.example.pinotdemo: DEBUG
# 为Pinot客户端设置INFO级别,查看连接和查询日志
org.apache.pinot.client: INFOPinot连接配置类
package com.example.pinotdemo.config;
import org.apache.pinot.client.Connection;
import org.apache.pinot.client.PinotClientException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.net.MalformedURLException;
import java.util.List;
/**
* Pinot连接配置类
* 用于创建和管理Pinot连接的Spring Bean
*/
@Configuration
public class PinotConfig {
private static final Logger logger = LoggerFactory.getLogger(PinotConfig.class);
// 从application.yml中注入Broker主机列表
@Value("${pinot.broker.hosts}")
private List<String> brokerHosts;
// 从application.yml中注入Controller主机和端口(虽然连接Broker,但有时需要Controller信息)
@Value("${pinot.controller.host}")
private String controllerHost;
@Value("${pinot.controller.port}")
private int controllerPort;
// 从application.yml中注入各种超时配置
@Value("${pinot.connection.query-timeout-ms:30000}")
private int queryTimeoutMs;
@Value("${pinot.connection.connection-timeout-ms:5000}")
private int connectionTimeoutMs;
@Value("${pinot.connection.socket-timeout-ms:10000}")
private int socketTimeoutMs;
/**
* 定义一个名为pinotConnection的Bean
* @return Pinot Connection对象
* @throws MalformedURLException 如果Broker URL格式错误
* @throws PinotClientException 如果连接Pinot失败
*/
@Bean
public Connection pinotConnection() throws MalformedURLException, PinotClientException {
// 将主机列表转换为逗号分隔的字符串,用于构建JDBC URL
String brokerUrl = String.join(",", brokerHosts);
logger.info("正在初始化Pinot连接到Brokers: {}", brokerUrl);
// 构建Pinot的JDBC连接URL
String url = "jdbc:pinot://" + brokerUrl;
// 使用Pinot客户端库创建连接
Connection connection = Connection.fromUrl(url);
// 记录超时配置
logger.debug("查询超时: {}ms", queryTimeoutMs);
logger.debug("连接超时: {}ms", connectionTimeoutMs);
logger.debug("Socket超时: {}ms", socketTimeoutMs);
logger.info("Pinot连接初始化成功。");
return connection;
}
}Pinot Service
package com.example.pinotdemo.service;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.pinot.client.Connection;
import org.apache.pinot.client.ResultSetGroup;
import org.apache.pinot.client.Request;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Pinot服务类
* 封装了与Pinot数据库交互的具体逻辑
*/
@Service
public class PinotService {
private static final Logger logger = LoggerFactory.getLogger(PinotService.class);
@Autowired
private Connection pinotConnection;
@Autowired
private ObjectMapper objectMapper;
/**
* 执行原始SQL查询
* 这是一个通用方法,可以执行任何有效的Pinot SQL查询
*
* @param sql 要执行的SQL查询语句
* @return 查询结果,以List<Map<String, Object>>格式返回,其中Map代表一行数据
*/
public List<Map<String, Object>> executeQuery(String sql) {
logger.debug("正在执行Pinot查询: {}", sql);
List<Map<String, Object>> results = new ArrayList<>();
try {
// 创建一个查询请求对象
Request request = new Request(sql);
// 通过连接执行查询,返回ResultSetGroup(可能包含多个结果集,但通常只有一个)
ResultSetGroup resultSetGroup = pinotConnection.execute(request);
// 遍历可能存在的多个结果集
for (int i = 0; i < resultSetGroup.getResultSetCount(); i++) {
var resultSet = resultSetGroup.getResultSet(i);
// 获取列名
int columnCount = resultSet.getColumnCount();
String[] columnNames = new String[columnCount];
for (int j = 0; j < columnCount; j++) {
columnNames[j] = resultSet.getColumnName(j);
}
while (resultSet.next()) {
Map<String, Object> row = new HashMap<>();
for (int j = 0; j < columnCount; j++) {
String columnName = columnNames[j];
Object value = resultSet.getObject(j);
row.put(columnName, value);
}
results.add(row);
}
}
logger.info("查询返回了 {} 行数据。", results.size());
} catch (Exception e) {
logger.error("执行Pinot查询时出错: {}", sql, e);
throw new RuntimeException("执行Pinot查询失败: " + e.getMessage(), e);
}
return results;
}
/**
* 查询广告性能数据的特定方法
* 演示如何构建一个具体的业务查询
*
* @param country 国家筛选条件
* @param deviceType 设备类型筛选条件
* @param adSlot 广告位筛选条件
* @return 包含性能指标的Map列表
*/
public List<Map<String, Object>> getAdPerformanceData(String country, String deviceType, String adSlot) {
// 构建SQL查询语句,使用聚合函数SUM, AVG等来计算指标
String sql = String.format(
"SELECT sum(impressions) AS total_impressions, " + // 计算总展示次数
" sum(clicks) AS total_clicks, " + // 计算总点击次数
" sum(spend) AS total_spend, " + // 计算总花费
" AVG(cpc) AS avg_cpc " + // 计算平均CPC
"FROM ad_performance_table " + // 从广告性能表查询
"WHERE country = '%s' AND device_type = '%s' AND ad_slot = '%s' " + // 应用筛选条件
"AND timeColumn > ago('1d')",
country, deviceType, adSlot
);
logger.info("正在获取广告性能数据 - 国家: {}, 设备: {}, 广告位: {}", country, deviceType, adSlot);
return executeQuery(sql);
}
/**
* 查询实时指标的示例方法
* 可用于实时仪表盘或告警
*/
public List<Map<String, Object>> getRealTimeMetrics() {
// 查询最近5分钟内的事件类型计数
String sql = "SELECT event_type, count(*) AS event_count FROM streaming_events_table WHERE timeColumn > ago('5m') GROUP BY event_type";
logger.info("正在获取最近5分钟的实时指标。");
return executeQuery(sql);
}
}Pinot Controller
package com.example.pinotdemo.controller;
import com.example.pinotdemo.service.PinotService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.util.List;
import java.util.Map;
@RestController
@RequestMapping("/api/pinot")
public class PinotController {
private static final Logger logger = LoggerFactory.getLogger(PinotController.class);
@Autowired
private PinotService pinotService;
/**
* 接收SQL查询请求的API端点
* @param sql 通过URL参数传递的SQL查询语句
* @return 查询结果或错误响应
*/
@GetMapping("/query")
public ResponseEntity<List<Map<String, Object>>> runQuery(@RequestParam String sql) {
logger.info("收到查询请求: {}", sql);
try {
List<Map<String, Object>> results = pinotService.executeQuery(sql);
return ResponseEntity.ok(results);
} catch (Exception e) {
logger.error("处理查询请求时出错", e);
return ResponseEntity.badRequest().build();
}
}
/**
* 获取广告性能数据的API端点
* 这是一个更具体的业务接口
*/
@GetMapping("/ad-performance")
public ResponseEntity<List<Map<String, Object>>> getAdPerformance(
@RequestParam String country, // 从URL参数获取国家
@RequestParam String deviceType,// 从URL参数获取设备类型
@RequestParam String adSlot) { // 从URL参数获取广告位
logger.info("收到广告性能请求 - 国家: {}, 设备: {}, 广告位: {}", country, deviceType, adSlot);
try {
List<Map<String, Object>> results = pinotService.getAdPerformanceData(country, deviceType, adSlot);
return ResponseEntity.ok(results);
} catch (Exception e) {
logger.error("处理广告性能请求时出错", e);
return ResponseEntity.badRequest().build();
}
}
/**
* 获取实时指标的API端点
*/
@GetMapping("/real-time-metrics")
public ResponseEntity<List<Map<String, Object>>> getRealTimeMetrics() {
logger.info("收到实时指标请求");
try {
List<Map<String, Object>> results = pinotService.getRealTimeMetrics();
return ResponseEntity.ok(results);
} catch (Exception e) {
logger.error("处理实时指标请求时出错", e);
return ResponseEntity.badRequest().build();
}
}
/**
* 健康检查端点
*/
@GetMapping("/health")
public ResponseEntity<Map<String, String>> health() {
// 返回简单的健康状态信息
Map<String, String> status = Map.of("status", "UP", "component", "Pinot Integration Demo");
return ResponseEntity.ok(status);
}
}BidRequest
package com.example.pinotdemo.dto;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* 竞价请求数据传输对象 (Data Transfer Object)
* 定义了从广告交易平台(ADX)接收到的竞价请求的数据结构
* 使用@JsonProperty注解映射JSON字段名到Java属性
*/
public class BidRequest {
@JsonProperty("request_id")
private String requestId; // 竞价请求的唯一标识符
@JsonProperty("user_agent")
private String userAgent; // 用户浏览器的User-Agent字符串
@JsonProperty("ip")
private String ip; // 用户的IP地址
@JsonProperty("device_type")
private String deviceType; // 设备类型 (e.g., mobile, desktop, tablet)
@JsonProperty("os")
private String os; // 操作系统 (e.g., iOS, Android, Windows)
@JsonProperty("browser")
private String browser; // 浏览器名称 (e.g., Chrome, Safari)
@JsonProperty("country")
private String country; // 用户所在国家
@JsonProperty("city")
private String city; // 用户所在城市
@JsonProperty("ad_slot")
private String adSlot; // 广告位标识 (e.g., banner_top, video_pre)
@JsonProperty("floor_price")
private double floorPrice; // 广告主设定的最低出价(底价)
public BidRequest() {}
public BidRequest(String requestId, String userAgent, String ip, String deviceType, String os, String browser, String country, String city, String adSlot, double floorPrice) {
this.requestId = requestId;
this.userAgent = userAgent;
this.ip = ip;
this.deviceType = deviceType;
this.os = os;
this.browser = browser;
this.country = country;
this.city = city;
this.adSlot = adSlot;
this.floorPrice = floorPrice;
}
public String getRequestId() { return requestId; }
public void setRequestId(String requestId) { this.requestId = requestId; }
public String getUserAgent() { return userAgent; }
public void setUserAgent(String userAgent) { this.userAgent = userAgent; }
public String getIp() { return ip; }
public void setIp(String ip) { this.ip = ip; }
public String getDeviceType() { return deviceType; }
public void setDeviceType(String deviceType) { this.deviceType = deviceType; }
public String getOs() { return os; }
public void setOs(String os) { this.os = os; }
public String getBrowser() { return browser; }
public void setBrowser(String browser) { this.browser = browser; }
public String getCountry() { return country; }
public void setCountry(String country) { this.country = country; }
public String getCity() { return city; }
public void setCity(String city) { this.city = city; }
public String getAdSlot() { return adSlot; }
public void setAdSlot(String adSlot) { this.adSlot = adSlot; }
public double getFloorPrice() { return floorPrice; }
public void setFloorPrice(double floorPrice) { this.floorPrice = floorPrice; }
}BidResponse
package com.example.pinotdemo.dto;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* 竞价响应数据传输对象
* 定义了发送给广告交易平台(ADX)的竞价响应的数据结构
* 使用@JsonProperty注解映射JSON字段名到Java属性
*/
public class BidResponse {
@JsonProperty("request_id")
private String requestId; // 对应的竞价请求ID
@JsonProperty("bid")
private boolean bid; // 是否出价 (true: 出价, false: 不出价)
@JsonProperty("bid_price")
private double bidPrice; // 出价金额
@JsonProperty("creative_id")
private String creativeId; // 如果出价成功,关联的广告创意ID
@JsonProperty("ad_id")
private String adId; // 如果出价成功,关联的广告ID
public BidResponse() {}
public BidResponse(String requestId, boolean bid, double bidPrice, String creativeId, String adId) {
this.requestId = requestId;
this.bid = bid;
this.bidPrice = bidPrice;
this.creativeId = creativeId;
this.adId = adId;
}
public String getRequestId() { return requestId; }
public boolean isBid() { return bid; }
public double getBidPrice() { return bidPrice; }
public String getCreativeId() { return creativeId; }
public String getAdId() { return adId; }
public void setRequestId(String requestId) { this.requestId = requestId; }
public void setBid(boolean bid) { this.bid = bid; }
public void setBidPrice(double bidPrice) { this.bidPrice = bidPrice; }
public void setCreativeId(String creativeId) { this.creativeId = creativeId; }
public void setAdId(String adId) { this.adId = adId; }
}Bidding Controller
package com.example.pinotdemo.controller;
import com.example.pinotdemo.dto.BidRequest;
import com.example.pinotdemo.dto.BidResponse;
import com.example.pinotdemo.service.BiddingService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import java.util.List;
import java.util.Map;
/**
* 竞价控制器
* 处理来自广告交易平台(ADX)的实时竞价(RTB)请求
*/
@RestController
@RequestMapping("/api/bidding")
public class BiddingController {
private static final Logger logger = LoggerFactory.getLogger(BiddingController.class);
@Autowired
private BiddingService biddingService;
/**
* 处理竞价请求的API端点
* @param bidRequest 包含竞价信息的JSON请求体
* @return 包含竞价决策结果的JSON响应
*/
@PostMapping("/bid")
public ResponseEntity<BidResponse> handleBidRequest(@RequestBody BidRequest bidRequest) {
logger.info("收到竞价请求: {}", bidRequest.getRequestId());
try {
// 调用服务层处理竞价逻辑
BidResponse response = biddingService.processBidRequest(bidRequest);
logger.info("处理完竞价请求: {}. 出价: {}, 价格: {}", bidRequest.getRequestId(), response.isBid(), response.getBidPrice());
return ResponseEntity.ok(response);
} catch (Exception e) {
logger.error("处理竞价请求时出错: {}", bidRequest.getRequestId(), e);
BidResponse errorResponse = new BidResponse(bidRequest.getRequestId(), false, 0.0, null, null);
return ResponseEntity.ok(errorResponse);
}
}
/**
* 获取竞价决策日志的API端点(示例)
* 这个端点可以查询存储在Pinot中的历史竞价决策记录
*/
@GetMapping("/decision-logs")
public ResponseEntity<List<Map<String, Object>>> getDecisionLogs() {
logger.info("正在从Pinot获取竞价决策日志...");
// 示例:SELECT * FROM bidding_decisions_log WHERE timestamp > ago('1h')
// 实现取决于如何在Pinot中存储决策日志
// return ResponseEntity.ok(pinotService.executeQuery("SELECT * FROM ..."));
return ResponseEntity.ok(List.of());
}
}Bidding Service
package com.example.pinotdemo.service;
import com.example.pinotdemo.dto.BidRequest;
import com.example.pinotdemo.dto.BidResponse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Random;
/**
* 竞价服务类
* 核心业务逻辑层,负责根据竞价请求和Pinot中的实时数据做出竞价决策
*/
@Service
public class BiddingService {
private static final Logger logger = LoggerFactory.getLogger(BiddingService.class);
@Autowired
private PinotService pinotService;
private final Random random = new Random();
/**
* 处理竞价请求的主要方法
* @param request 来自ADX的竞价请求对象
* @return 竞价决策响应对象
*/
public BidResponse processBidRequest(BidRequest request) {
String requestId = request.getRequestId();
String country = request.getCountry();
String deviceType = request.getDeviceType();
String adSlot = request.getAdSlot();
double floorPrice = request.getFloorPrice();
logger.info("正在处理竞价请求: ID={}, 国家={}, 设备={}, 广告位={}, 底价={}", requestId, country, deviceType, adSlot, floorPrice);
// 1. 查询Pinot获取相关的实时性能数据
// 调用PinotService中的方法,该方法会执行SQL查询并返回结果
List<Map<String, Object>> performanceData = pinotService.getAdPerformanceData(country, deviceType, adSlot);
// 2. 分析查询到的数据,做出决策
// 从查询结果中提取关键指标,例如平均CPC (Cost Per Click)
Optional<Double> avgCpcOpt = performanceData.stream()
.findFirst() // 假设聚合查询只返回一行数据
.map(row -> (Double) row.get("avg_cpc")); // 获取"avg_cpc"列的值
if (!avgCpcOpt.isPresent() || avgCpcOpt.get() == null) {
logger.warn("未找到请求 {} 的历史 avg_cpc 数据,使用默认策略。", requestId);
// 如果没有历史数据,执行默认策略
return createDefaultBidResponse(requestId, floorPrice);
}
double avgCpc = avgCpcOpt.get();
logger.debug("为请求 {} 计算出的 avg_cpc: {}", requestId, avgCpc);
// 根据策略(例如:avg_cpc * 因子,同时尊重底价)计算出价
double calculatedBidPrice = calculateBidPrice(avgCpc, floorPrice);
// 3. 决定是否出价以及出价金额
if (calculatedBidPrice > floorPrice) {
// 决定出价
String creativeId = selectCreativeId(request); // 选择合适的广告创意ID
String adId = deriveAdId(creativeId); // 根据创意ID推导广告ID
logger.info("请求 {} 赢得竞价,价格: {}", requestId, calculatedBidPrice);
// 返回包含出价信息的响应
return new BidResponse(requestId, true, calculatedBidPrice, creativeId, adId);
} else {
// 决定不出价
logger.info("请求 {} 未赢得竞价,计算价格: {} <= 底价: {}", requestId, calculatedBidPrice, floorPrice);
// 返回不出价的响应
return new BidResponse(requestId, false, 0.0, null, null);
}
}
/**
* 当没有历史数据时,使用的默认竞价策略
* @param requestId 请求ID
* @param floorPrice 底价
* @return 默认的竞价响应
*/
private BidResponse createDefaultBidResponse(String requestId, double floorPrice) {
// 默认策略:如果没有数据,有30%的概率出价,出价略高于底价
boolean shouldBid = random.nextDouble() > 0.7; // 70%概率不竞价,30%概率竞价
if (shouldBid) {
double defaultBid = floorPrice * 1.05; // 出价比底价高5%
String creativeId = "default_creative_" + random.nextInt(10); // 随机选择一个默认创意
String adId = deriveAdId(creativeId);
return new BidResponse(requestId, true, defaultBid, creativeId, adId);
} else {
// 不出价
return new BidResponse(requestId, false, 0.0, null, null);
}
}
/**
* 根据历史平均CPC和底价计算本次出价
* @param avgCpc 历史平均CPC
* @param floorPrice 底价
* @return 计算出的出价
*/
private double calculateBidPrice(double avgCpc, double floorPrice) {
// 策略:出价比平均CPC高10%,但不能低于底价,也不能超过底价的1.5倍
double targetBid = avgCpc * 1.10; // 目标出价 = 平均CPC * 1.1
double maxBid = floorPrice * 1.5; // 出价上限 = 底价 * 1.5
// 最终出价 = max(底价, min(目标出价, 上限))
double finalBid = Math.max(floorPrice, Math.min(targetBid, maxBid));
logger.debug("计算出价: 目标={}, 上限={}, 最终={}", targetBid, maxBid, finalBid);
return finalBid;
}
/**
* 选择广告创意ID的逻辑(简化示例)
* 在真实场景中,这里会包含复杂的用户画像、创意匹配、预算控制等逻辑
* @param request 竞价请求
* @return 选中的创意ID
*/
private String selectCreativeId(BidRequest request) {
// 示例:基于广告位和时间戳生成一个创意ID
return"creative_" + request.getAdSlot() + "_" + System.currentTimeMillis() % 100;
}
/**
* 从创意ID推导广告ID的逻辑(简化示例)
* @param creativeId 创意ID
* @return 广告ID
*/
private String deriveAdId(String creativeId) {
// 示例:将创意ID前缀替换为广告ID前缀
return creativeId.replace("creative_", "ad_");
}
}Application
package com.example.pinotdemo;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class PinotIntegrationDemoApplication {
public static void main(String[] args) {
SpringApplication.run(PinotIntegrationDemoApplication.class, args);
}
}关注我,送Java福利
/**
* 这段代码只有Java开发者才能看得懂!
* 关注我微信公众号之后,
* 发送:"666",
* 即可获得一本由Java大神一手面试经验诚意出品
* 《Java开发者面试百宝书》Pdf电子书
* 福利截止日期为2025年02月28日止
* 手快有手慢没!!!
*/
System.out.println("请关注我的微信公众号:");
System.out.println("Java知识日历");
597

被折叠的 条评论
为什么被折叠?



