二十六、Flink源码阅读--sql执行转换过程

本文深入探讨Flink SQL如何从语句转换为DataStream任务的源码层面,通过DEMO演示,详细阐述注册表、Table生成及转换为DataStream的全过程。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

flink sql 在执行中是如何从sql语句或者是table api 转为最后的DataStream任务或者是DataSet任务的,本篇我们从源码角度看下中间的执行和转换过程。

DEMO

这是flink的一个单元测试方法,模拟实时数据查询

@Test
	public void testSelect() throws Exception {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
		StreamITCase.clear();

		DataStream<Tuple3<Integer, Long, String>> ds = JavaStreamTestData.getSmall3TupleDataSet(env);
		Table in = tableEnv.fromDataStream(ds, "a,b,c");
		tableEnv.registerTable("MyTable", in);

		String sqlQuery = "SELECT * FROM MyTable";
		Table result = tableEnv.sqlQuery(sqlQuery);

		DataStream<Row> resultSet = tableEnv.toAppendStream(result, Row.class);
		resultSet.addSink(new StreamITCase.StringSink<Row>());
		env.execute();

		List<String> expected = new ArrayList<>();
		expected.add("1,1,Hi");
		expected.add("2,2,Hello");
		expected.add("3,2,Hello world");

		StreamITCase.compareWithList(expected);
	}
注册表
tableEnv.registerTable("MyTable", in);
==>
StreamTableEnvironment.registerDataStream
==>
registerDataStreamInternal
==>
registerTableInternal
==>
protected def registerTableInternal(name: String, table: AbstractTable): Unit = {
  if (isRegistered(name)) {
    throw new TableException(s"Table \'$name\' already exists. " +
      s"Please, choose a different name.")
  } else {
    rootSchema.add(name, table)
  }
}
将表结构添加到schema中,注册表成功。
Table生成过程
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);

Table result = tableEnv.sqlQuery(sqlQuery);

===>
def sqlQuery(query: String): Table = {
   val planner = new FlinkPlannerImpl(getFrameworkConfig, getPlanner, getTypeFactory)
   // parse the sql query
   val parsed = planner.parse(query)//生成SqlNode抽象语法树,SqlNode是抽象类,子类为SqlSelect,SqlDelete,SqlJoin,SqlAlter等
   if (null != parsed && parsed.getKind.belongsTo(SqlKind.QUERY)) {
     // validate the sql query
     val validated = planner.validate(parsed)//校验 SqlNode抽象语法树
     // transform to a relational treex
     val relational = planner.rel(validated)//Ast--> logic plan
     new Table(this, LogicalRelNode(relational.rel))//relational.rel表示Logic plan,在这里是 LogicalProject类型
   } else {
     throw new TableException(
       "Unsupported SQL query! sqlQuery() only accepts SQL queries of type " +
         "SELECT, UNION, INTERSECT, EXCEPT, VALUES, and ORDER_BY.")
   }
 }

构造Table对象的过程就是将sql 转为 SqlNode ,再校验,再转为逻辑计划。调用的过程都是和calsite同理,calsite可以参考 https://matt33.com/2019/03/07/apache-calcite-process-flow/

Table 转为 DataStream过程
DataStream<Row> resultSet = tableEnv.toAppendStream(result, Row.class);
resultSet.addSink(new StreamITCase.StringSink<Row>());
env.execute();

==>

def toAppendStream[T](
     table: Table,
     clazz: Class[T],
     queryConfig: StreamQueryConfig): DataStream[T] = {
   val typeInfo = TypeExtractor.createTypeInfo(clazz)
   TableEnvironment.validateType(typeInfo)
   translate[T](table, queryConfig, updatesAsRetraction = false, withChangeFlag = false)(typeInfo)
 }

==>

protected def translate[A](
    table: Table,
    queryConfig: StreamQueryConfig,
    updatesAsRetraction: Boolean,
    withChangeFlag: Boolean)(implicit tpe: TypeInformation[A]): DataStream[A] = {
  val relNode = table.getRelNode//获取逻辑计划
  val dataStreamPlan = optimize(relNode, updatesAsRetraction)//优化生成物理执行计划

  val rowType = getResultType(relNode, dataStreamPlan)

  translate(dataStreamPlan, rowType, queryConfig, withChangeFlag)
}
==》translateToCRow ==》DataStreamScan.translateToPlan ==》convertToInternalRow ==》generateConversionProcessFunction 生成具体的算子

DataSet也是同理的翻译过程,最终sql 就可以像DataStream一样执行任务。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值