摘要:
duckdb对于物理计划的执行采用了并行处理, 对pipeline的处理很值得学习.
本文以聚合为例进行分析
DML:
表结构:
use memory;
create table b(b1 int, b2 varchar(2), primary key(b1)) ;
create table a(a1 int, a2 varchar(2), foreign key(a1) references b(b1)) ;
create table c(c1 int, c2 varchar(2)) ;
create table d(d1 int, d2 varchar(2)) ;
insert into a values(1, 'a1');
insert into a values(null, 'a2');
insert into a values(3, 'a3');
insert into b values(1, 'b1');
insert into b values(2, 'b2');
insert into b values(3, 'b3');
insert into c values(1, 'c1');
insert into c values(2, 'c2');
insert into c values(null, 'c3');
insert into d values(1, 'd1');
insert into d values(2, 'd2');
insert into d values(1, 'd3');
insert into d values(2, 'd4');
insert into d values(null, 'd5');
查询SQL:
select sum(d1) from d group by d1;
核心流程:
PerfectAggregateHashTable::Scan
#0 duckdb::PerfectAggregateHashTable::Scan (this=0x614000022640, scan_position=@0x60200020e358: 0, result=...)
at /root/work/duckdb-dev/trunk/duckdb-0.7.1/src/execution/perfect_aggregate_hashtable.cpp:223
#1 0x00000000059f82a2 in duckdb::PhysicalPerfectHashAggregate::GetData (this=0x6130000cd180, context=..., chunk=..., gstate_p=..., lstate=...)
at /root/work/duckdb-dev/trunk/duckdb-0.7.1/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp:193
#2 0x00000000036ab192 in duckdb::PipelineExecutor::FetchFromSource (this=0x613000015200, result=...) at /root/work/duckdb-dev/trunk/duckdb-0.7.1/src/parallel/pipeline_executor.cpp:312
#3 0x00000000036a58b1 in duckdb::PipelineExecutor::Execute (this=0x613000015200, max_chunks=18446744073709551615)
at /root/work/duckdb-dev/trunk/duckdb-0.7.1/src/parallel/pipeline_executor.cpp:47
#4 0x00000000036a5a70 in duckdb::PipelineExecutor::Execute (this=0x613000015200) at /root/work/duckdb-dev/trunk/duckdb-0.7.1/src/parallel/pipeline_executor.cpp:66
#5 0x00000000036b60bc in duckdb::PipelineTask::ExecuteTask (this=0x604000048bd0, mode=duckdb::TaskExecutionMode::PROCESS_ALL)
at /root/work/duckdb-dev/trunk/duckdb-0.7.1/src/parallel/pipeline.cpp:42
#6 0x0000000003686287 in duckdb::ExecutorTask::Execute (this=0x604000048bd0, mode=duckdb::TaskExecutionMode::PROCESS_ALL)
at /root/work/duckdb-dev/trunk/duckdb-0.7.1/src/parallel/executor_task.cpp:17
#7 0x00000000036aecab in duckdb::TaskScheduler::ExecuteForever (this=0x60b0000001a0, marker=0x602000001fb0) at /root/work/duckdb-dev/trunk/duckdb-0.7.1/src/parallel/task_scheduler.cpp:135
#8 0x00000000036af742 in duckdb::ThreadExecuteTasks (scheduler=0x60b0000001a0, marker=0x602000001fb0) at /root/work/duckdb-dev/trunk/duckdb-0.7.1/src/parallel/task_scheduler.cpp:185
#9 0x000000000370d3d8 in std::__invoke_impl<void, void (*)(duckdb::TaskScheduler*, std::atomic<bool>*), duckdb::TaskScheduler*, std::atomic<bool>*> (
__f=@0x603000038548: 0x36af6fd <duckdb::ThreadExecuteTasks(duckdb::TaskScheduler*, std::atomic<bool>*)>) at /usr/include/c++/8/bits/i