整体架构
MySQL (orders)
│
↓ (Flink CDC 实时同步)
Doris DWD: app_db.ods_orders ←─ Unique Key 模型(明细层)
│
↓ (定时批处理聚合)
Doris DWS: app_db.dws_orders ←─ Aggregate Key 模型(汇总层)
批处理:全量聚合+周期调度
更新方式:INSERT OVERWRITE 、REPLACE INTO
调度方式:Crontab、Airflow、Doris Manager、DolphinScheduler
更新频率:小表1-8分钟,一般30分钟-1小时,大表t+1
Flink + Doris Source & Sink(流批一体)
批处理作业
不自动监听源表数据变化
周期运行(Flink Batch Scheduler 或外部调度器(如 Airflow)
数据库准备
-- mysql 中创建 orders 表
CREATE TABLE `app_db`.`orders` (
`order_id` INT NOT NULL,
`customer` VARCHAR(40),
`amount` DECIMAL(10,2) NOT NULL,
PRIMARY KEY (`order_id`)
);
-- doris 中创建 orders 表
CREATE TABLE app_db.ods_orders (
order_id INT,
customer VARCHAR(40),
amount DECIMAL(10,2),
update_time DATETIME, -- 记录同步/更新时间
) ENGINE=OLAP
UNIQUE KEY(order_id)
DISTRIBUTED BY HASH(order_id) BUCKETS 10
PROPERTIES("replication_num" = "1");
-- doris 中创建 orders 聚合 表
CREATE TABLE app_db.dws_orders (
customer VARCHAR(40),
amount BIGINT SUM -- 聚合列
) ENGINE=OLAP
AGGREGATE KEY(customer)
DISTRIBUTED BY HASH(customer) BUCKETS 10
PROPERTIES("replication_num" = "1");
CDC作业
-- 创建 CDC 源表
CREATE TABLE mysql_orders_cdc (
`order_id` INT NOT NULL,
`customer` VARCHAR(40),
`amount` DECIMAL(10,2) NOT NULL,
PRIMARY KEY (order_id) NOT ENFORCED
) WITH (
'connector' = 'mysql-cdc',
'hostname' = 'mydoris',
'port' = '3306',
'username' = 'root',
'password' = 'Admin1234',
'database-name' = 'app_db',
'table-name' = 'orders',
'server-id' = '5400-5499',
'server-time-zone' = 'Asia/Shanghai'
);
-- 创建 Doris Sink 表
CREATE TABLE doris_ods_orders (
order_id INT,
customer VARCHAR(40),
amount DECIMAL(10,2),
update_time TIMESTAMP(0),
PRIMARY KEY (order_id) NOT ENFORCED
) WITH (
'connector' = 'doris',
'fenodes' = 'mydoris:8030',
'table.identifier' = 'app_db.ods_orders',
'username' = 'root',
'password' = 'Admin1234',
'sink.label-prefix' = 'flink_app_db_orders',
'sink.enable-delete' = 'true',
'sink.buffer-flush.interval' = '10s', --
'sink.buffer-flush.max-rows' = '100000',
'sink.buffer-flush.max-bytes' = '10485760' -- 可选:10MB
);
--计划作业
INSERT INTO doris_ods_orders
SELECT
order_id,
customer,
amount,
PROCTIME() AS update_time -- 自动填充处理时间
FROM mysql_orders_cdc;
提交作业
--提交作业
$FLINK_HOME/bin/sql-client.sh -f mysql-to-doris-order.sql
调度 Doris 汇总脚本
编写执行脚本(dws_orders_sync.sh)
#!/bin/bash
# 执行DWD→DWS的汇总SQL
mysql -h mydoris -P 9030 -u root -pAdmin1234 -e "
INSERT OVERWRITE table app_db.dws_orders
SELECT
customer,
SUM(amount) AS amount
FROM app_db.ods_orders
GROUP BY customer;
"
给脚本授权
chmod +x dws_orders_sync.sh
配置 Crontab 定时任务
# 编辑定时任务
crontab -e
# # 添加一行:每 2 分钟聚合一次
*/2 * * * * flock -n /tmp/dws_orders_sync.lock /opt/data/job/dws_orders_sync.sh >> /opt/data/job/dws_orders_sync.log 2>&1
验证
crontab -l
通过日志文件排查执行情况。
cat /opt/data/job/dws_orders_sync.log
数据验证

1762

被折叠的 条评论
为什么被折叠?



