https://doris.apache.org/zh-CN/docs/data-operate/import/import-way/routine-load-manual/
json (无嵌套)
CREATE ROUTINE LOAD v2x_olap_database.government_car_route_load ON government_car_route_load
COLUMNS(temId, tehNo, speed, dayMileage, gpsTime, status, direction, k1, longitude, latitude, eventTime, dt)
PROPERTIES
(
"desired_concurrent_number"="3",
"max_batch_interval" = "20",
"max_batch_rows" = "300000",
"max_batch_size" = "209715200",
"strict_mode" = "false",
"format" = "json"
)
FROM KAFKA
(
"kafka_broker_list" = "10.0.2.37:31090,10.0.2.38:31091,10.0.2.38:31092",
"kafka_topic" = "government_car_topic",
"property.group.id" = "government_car_route_load_group",
"property.kafka_default_offsets" = "OFFSET_BEGINNING",
"property.enable.auto.commit" = "false",
"kafka_partitions" = "0"
);
json (嵌套)
参考:https://www.cnblogs.com/ywjfx/p/14722302.html
CREATE ROUTINE LOAD test.test_json_label_4 ON user_json
COLUMNS(id, name, counts, ts)
PROPERTIES
(
"desired_concurrent_number"="3",
"max_batch_interval" = "20",
"max_batch_rows" = "300000",
"max_batch_size" = "209715200",
"strict_mode" = "false",
"format" = "json",
"jsonpaths" = "[\"$.test.id\",\"$.test.name\",\"$.test.counts\",\"$.test.ts\"]"
)
FROM KAFKA
(
"kafka_broker_list" = "localhost:9092",
"kafka_topic" = "Demo2",
"kafka_partitions" = "0",
"kafka_offsets" = "0"
);
查看导入
mysql> show routine load\G;
Empty set (0.00 sec)
删除导入任务
mysql> STOP ROUTINE LOAD FOR government_car_route_load;
Query OK, 0 rows affected (0.00 sec)
暂停任务
mysql> PAUSE ROUTINE LOAD FOR government_car_route_load;
从 pause 状态中恢复
mysql> RESUME ROUTINE LOAD FOR government_car_route_load
问题记录
failed to get kafka partition info: [no partition in this topic]
"kafka_partitions" = "0"
no partition for this tuple.
分区表历史分区未创建报如上错误
参考:https://blog.youkuaiyun.com/qq_37865420/article/details/122456181
创建历史分区
alter table v2x_olap_database.government_car_recovery set ("dynamic_partition.create_history_partition"="true","dynamic_partition.start" = "-75");
routine load 容错 current error rows is more than max error num
where 过滤条件
频繁因数据质量问题导致数据导入失败,加入 where 进行过滤
CREATE ROUTINE LOAD xxx ON xxx
COLUMNS(...),
where timestamp < servertimestamp and timestamp > UNIX_TIMESTAMP(DATE(now())) * 1000
PROPERTIES
(
...
)
FROM KAFKA
(
...
);
routine load 状态异常
failed to find 3 backends
其中某个 backend 状态异常
恢复重启该backend ./start-be.sh --daemon
分区自动创建,恢复routine load RESUME ROUTINE LOAD FOR government_car_route_load