flink 多表join的例子

最新推荐文章于 2024-12-02 12:00:00 发布

转载最新推荐文章于 2024-12-02 12:00:00 发布 · 3.2k 阅读

3 ·

CC 4.0 BY-SA版权

原文链接：http://blog.51cto.com/12597095/2398626

文章标签：

#大数据 #数据库 #scala

博主记录了一个稍复杂的例子，实现了类似mysql group_concat的功能，还提及MapToString参考了之前关于bug的博客，并给出了转载链接。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

今天写了一个稍微复杂的例子，实现了类似mysql group_concat 功能，记录一下
MapToString 参考bug 那篇博客

public static void main(String[] arg) throws Exception {

        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        BatchTableEnvironment tableEnv = new BatchTableEnvironment(env, TableConfig.DEFAULT());
        tableEnv.registerFunction("mapToString", new MapToString());

        getProjectInfo(env,tableEnv);
        getProject(env,tableEnv);
        joinTableProjectWithInfo(tableEnv);

        Table query = tableEnv.sqlQuery("select id, name, type from result_agg");
        DataSet<Row> ds=  tableEnv.toDataSet(query, Row.class);
        ds.print();
        ds.writeAsText("/home/test", WriteMode.OVERWRITE);
        env.execute("multiple-table");          
    }

    public static void getProjectInfo(ExecutionEnvironment env,BatchTableEnvironment tableEnv) {

        TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
        String[] fieldNames = new String[] { "id",  "type" };
        RowTypeInfo rowTypeInfo = new RowTypeInfo(fieldTypes, fieldNames);
        JDBCInputFormat jdbcInputFormat = JDBCInputFormat.buildJDBCInputFormat().setDrivername("com.mysql.jdbc.Driver")
                .setDBUrl("jdbc:mysql://ip:3306/space?characterEncoding=utf8")
                .setUsername("user").setPassword("pwd")
                .setQuery("select project_fid, cast(project_info_type as CHAR) as type from project").setRowTypeInfo(rowTypeInfo).finish();
        DataSource<Row> s = env.createInput(jdbcInputFormat);   
        tableEnv.registerDataSet("project_info", s);
        aggProjectInfo(tableEnv,"project_info");
    }

    public static void aggProjectInfo(BatchTableEnvironment tableEnv, String tableName) {   
        Table tapiResult = tableEnv.scan(tableName);
        tapiResult.printSchema();               
        Table query = tableEnv.sqlQuery("select id, mapToString(collect(type)) as type from project_info group by id");
        tableEnv.registerTable(tableName+"_agg", query);        
        tapiResult = tableEnv.scan(tableName+"_agg");
        tapiResult.printSchema();   
    }

    public static void getProject(ExecutionEnvironment env,BatchTableEnvironment tableEnv) {

        TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
        String[] fieldNames = new String[] { "pid",  "name" };
        RowTypeInfo rowTypeInfo = new RowTypeInfo(fieldTypes, fieldNames);
        JDBCInputFormat jdbcInputFormat = JDBCInputFormat.buildJDBCInputFormat().setDrivername("com.mysql.jdbc.Driver")
                .setDBUrl("jdbc:mysql://ip:3306/space?characterEncoding=utf8")
                .setUsername("user").setPassword("pwd")
                .setQuery("select fid, project_name  from t_project").setRowTypeInfo(rowTypeInfo).finish();
        DataSource<Row> s = env.createInput(jdbcInputFormat);
        tableEnv.registerDataSet("project", s);

    }

    public static void joinTableProjectWithInfo(BatchTableEnvironment tableEnv) {
        Table result =tableEnv.sqlQuery("select a.pid as id , a.name , b.type  from project a inner join  project_info_agg  b on a.pid=b.id");
        tableEnv.registerTable("result_agg", result);
        result.printSchema();
    }

转载于:https://blog.51cto.com/12597095/2398626