package com.example.flink;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.connector.kafka.source.KafkaSource;
import org.apache.flink.connector.kafka.sink.KafkaSink;
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.apache.flink.util.Collector;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.*;
// 简单数据结构
class MetricConfig {
public String metricCode;
public String sourceTopic;
public String groupField;
public String staField;
public String staType; // "sum" or "count"
// 省略 timeField / sinkTopic 等
}
class InputEvent {
public String sourceTopic;
public String groupField;
public Double value;
}
public class DynamicMetricJob {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// --- 1. Kafka Source: 正则匹配所有以 "metrics-" 开头的 topic ---
KafkaSource<String> kafkaSource = KafkaSource.<String>builder()
.setBootstrapServers("localhost:9092")
.setTopicsPattern("metrics-.*") // 正则匹配
.setGroupId("flink-metric-group")
.setStartingOffsets(OffsetsInitializer.earliest())
.build();
DataStreamSource<String> kafkaStream = env.fromSource(
kafkaSource,
WatermarkStrategy.noWatermarks(),
"KafkaSource"
);
// --- 2. 配置 Source: 定时拉取 MySQL ---
// 模拟配置流:用 SourceFunction 每隔10秒拉一次
DataStreamSource<MetricConfig> configStream = env.addSource(new MySqlConfigSource(10000));
// --- 3. 广播配置 ---
var configDesc = new ListStateDescriptor<>("metric-configs", MetricConfig.class);
var broadcastStream = configStream.broadcast(configDesc);
// --- 4. 数据流 & 配置流 connect ---
kafkaStream.connect(broadcastStream)
.process(new DynamicMetricProcessFunction())
.sinkTo(KafkaSink.<String>builder()
.setBootstrapServers("localhost:9092")
.setRecordSerializer(
org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema.builder()
.setTopic("metric-result")
.setValueSerializationSchema(new org.apache.flink.api.common.serialization.SimpleStringSchema())
.build()
)
.build());
env.execute("Dynamic Metric Flink Job");
}
/**
* 配置源:定时从 MySQL 拉取 metric 配置
*/
public static class MySqlConfigSource extends RichSourceFunction<MetricConfig> {
private final long intervalMs;
private volatile boolean running = true;
public MySqlConfigSource(long intervalMs) {
this.intervalMs = intervalMs;
}
@Override
public void run(SourceContext<MetricConfig> ctx) throws Exception {
while (running) {
try (Connection conn = DriverManager.getConnection(
"jdbc:mysql://localhost:3306/risk", "user", "password");
Statement stmt = conn.createStatement()) {
ResultSet rs = stmt.executeQuery("SELECT metric_code, source_topic, group_field, sta_field, sta_type FROM metric_config");
while (rs.next()) {
MetricConfig cfg = new MetricConfig();
cfg.metricCode = rs.getString("metric_code");
cfg.sourceTopic = rs.getString("source_topic");
cfg.groupField = rs.getString("group_field");
cfg.staField = rs.getString("sta_field");
cfg.staType = rs.getString("sta_type");
ctx.collect(cfg);
}
}
Thread.sleep(intervalMs);
}
}
@Override
public void cancel() {
running = false;
}
}
/**
* 动态指标处理逻辑
*/
public static class DynamicMetricProcessFunction
extends KeyedBroadcastProcessFunction<String, String, MetricConfig, String> {
private ListState<MetricConfig> configState;
@Override
public void open(Configuration parameters) {
configState = getRuntimeContext().getListState(new ListStateDescriptor<>("configs", MetricConfig.class));
}
@Override
public void processElement(String value,
ReadOnlyContext ctx,
Collector<String> out) throws Exception {
// 假设 kafka 消息是 JSON,这里简单 mock
InputEvent event = new InputEvent();
event.sourceTopic = ctx.getCurrentKey();
event.groupField = "userA";
event.value = 1.0;
// 遍历当前配置,匹配 metricCode 计算
for (MetricConfig cfg : configState.get()) {
if (cfg.sourceTopic.equals(event.sourceTopic)) {
double metricValue = 0;
if ("sum".equalsIgnoreCase(cfg.staType)) {
metricValue = event.value; // 示例:累加逻辑应放到 Aggregator
} else if ("count".equalsIgnoreCase(cfg.staType)) {
metricValue = 1;
}
out.collect(String.format("{\"group\":\"%s\",\"metricCode\":\"%s\",\"metricValue\":%f}",
event.groupField, cfg.metricCode, metricValue));
}
}
}
@Override
public void processBroadcastElement(MetricConfig cfg,
Context ctx,
Collector<String> out) throws Exception {
// 简单存储到 ListState (真实场景要去重、支持删除)
configState.clear();
configState.add(cfg);
}
}
}
【flink】动态拉取配置执行计算demo
最新推荐文章于 2025-11-25 14:07:10 发布
6501

被折叠的 条评论
为什么被折叠?



