IT技术分享--使用Flink操作clickhouse source和单条插入sink和批量插入sink

一、使用Flink操作Clickhouse Source实战

主要通过继承Flink的RichSourceFunction类实现Clickhouse Source和重写run方法。

public class ClickHouseSourceUtil extends RichSourceFunction<List<LogMessage>> {

    private volatile boolean isRunning = true;
    private Connection connection;

    String sql;
    String logStartDate;
    String logEndDate;

    public ClickHouseSourceUtil(String sql,String logStartDate,String logEndDate) {
        this.sql = sql;
        this.logStartDate = logStartDate;
        this.logEndDate = logEndDate;
    }

    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        //获取属性信息
        EnvPropertiesUtil envProperties = new EnvPropertiesUtil();
        Properties props = envProperties.getEnvProperties();

        // 初始化连接
        String url = props.getProperty(Constants.CLICKHOUSE_DB);
        connection = DriverManager.getConnection(url, props.getProperty(Constants.CLICKHOUSE_USER), props.getProperty(Constants.CLICKHOUSE_PASSWD));
    }

    @Override
    public void run(SourceFunction.SourceContext<List<LogMessage>> ctx) throws Exception {
        int batchOffset = 0;//初始化偏移量为0,即从第一行开始查询数据
        int batchSize = Constants.CLICKHOUSE_QUERY_BATCH_SIZE;  // 假设每次查询1000条记录

        while (isRunning) {
            String dataSql = " where toYYYYMMDD(log_date) = "+logStartDate+" ";
            String query = sql+ dataSql +"  ORDER BY id LIMIT " + batchSize + " OFFSET " + batchOffset ;
            log.info("clickhouse log_info查询语句 :{}", query);
            try (Statement stmt = connection.createStatement();
                 ResultSet rs = stmt.executeQuery(query)) {
                ArrayList<LogMessage> logList = new ArrayList<>();
                int rowCount = 0;
                while (rs.next()) {
                    //rs.getInt("id")
                    rowCount++;
                    logList.add(convertResultSetToObjects(rs));
                }
                ctx.collect(logList);

                if (0 == rowCount || rowCount < batchSize) {
//                    Thread.sleep(10000);
                    isRunning = false;
                }
            }
            batchOffset += batchSize;

            // 简单暂停一下模拟批处理间隔
//            Thread.sleep(1000);
        }
    }

    private LogMessage convertResultSetToObjects(ResultSet rs) throws SQLException {
        DateTimeFormatter fmt = DateTimeFormatter.ofPattern(Constants.DATATIME_FORMAT);

        LogMessage logMessage = new LogMessage();
        String id = rs.getString("id");
        String logType = rs.getString("log_type");
        String logOrigin = rs.getString("log_origin");
        String logDate = rs.getString("log_date");
        String reportDate = rs.getString("report_date");

        logMessage.setId(id);
        logMessage.setLogType(logType);
        logMessage.setLogOrigin(logOrigin);
        logMessage.setLogDate(LocalDateTime.parse(logDate,fmt));
        logMessage.setReportDate(LocalDateTime.parse(reportDate,fmt));
        logMessage.setHash(hash);

        return logMessage;
    }

    @Override
    public void cancel() {
        isRunning = false;
    }

    @Override
    public void close() throws Exception {
        if (connection != null) {
            connection.close();
        }
        super.close();
    }
}

二、使用Flink操作Clickhouse 单条插入Sink实战

主要通过继承Flink的RichSinkFunction类实现Clickhouse Source和重写invoke方法。并通过preparedStatement.execute()实现单条数据插入到clickhouse。

 
public class ClickHouseUtil extends RichSinkFunction<LogMessage> {
    private ClickHouseConnection conn = null;

    String sql;

    public ClickHouseUtil(String sql) {
        this.sql = sql;
    }

    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        return ;
    }

    @Override
    public void close() throws Exception {
        super.close();
        if (conn != null)
        {
            conn.close();
        }
    }

    /**
     * 单条提交
     * @param logMsg     日志信息对象
     * @param context    上下文信息
     * @throws Exception    数据库异常信息
     */
    @Override
    public void invoke(LogMessage logMsg, Context context) {
        //获取属性信息
        EnvPropertiesUtil envProperties = new EnvPropertiesUtil();
        Properties props = envProperties.getEnvProperties();

        ClickHouseProperties properties = new ClickHouseProperties();
        properties.setUser(props.getProperty(Constants.CLICKHOUSE_USER));
        properties.setPassword(props.getProperty(Constants.CLICKHOUSE_PASSWD));
        properties.setSessionId(Constants.DEFAULT_SESSION_ID);
        ClickHouseDataSource dataSource = new ClickHouseDataSource(props.getProperty(Constants.CLICKHOUSE_DB), properties);
        Map<ClickHouseQueryParam, String> additionalDBParams = new HashMap<>();

        additionalDBParams.put(ClickHouseQueryParam.SESSION_ID, Constants.NEW_SESSION_ID);

        DateTimeFormatter fmt = DateTimeFormatter.ofPattern(Constants.DATATIME_FORMAT);
        String logDateStr = logMsg.getLogDate().format(fmt);
        String reportDateStr = logMsg.getReportDate().format(fmt);
        try {
            conn = dataSource.getConnection();
            PreparedStatement preparedStatement = conn.prepareStatement(sql);
            preparedStatement.setString(1,logMsg.getOrgCode());
            preparedStatement.setString(2, logMsg.getContent());
            preparedStatement.setString(3, logMsg.getIp());
            preparedStatement.setString(4, logMsg.getAddr());
            
            //单条执行,execute()方法执行SQL语句并得到影响的行数。会自动提交事务。不需要手动提交事务
                      preparedStatement.execute();
        }
        catch (Exception e){
            e.printStackTrace();
        }
    }
}

​三、使用Flink操作Clickhouse 批量插入Sink实战

主要通过继承Flink的RichSinkFunction类实现Clickhouse Source和重写invoke方法。并通过preparedStatement.addBatch()和preparedStatement.executeBatch()实现批量数据插入到clickhouse。

public class ClickHouseBatchUtil extends RichSinkFunction<List<LogMessage>> {

    String sql;

    public ClickHouseBatchUtil(String sql) {
        this.sql = sql;
    }

    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        return ;
    }

    /**
     * 批量提交
     * @param logMsgList     日志信息对象
     * @param context    上下文信息
     * @throws Exception    数据库异常信息
     */
    @Override
    public void invoke(List<LogMessage> logMsgList, Context context) {
        DateTimeFormatter fmt = DateTimeFormatter.ofPattern(Constants.DATATIME_FORMAT);
        String logDateStr = "";
        String reportDateStr = "";
 
        try {
            ClickHouseConnection conn = getConn();
            PreparedStatement preparedStatement = conn.prepareStatement(sql);
            for (LogMessage logMsg : logMsgList) {
                logDateStr = logMsg.getLogDate().format(fmt);
                reportDateStr = logMsg.getReportDate().format(fmt);
                preparedStatement.setString(1,logMsg.getOrgCode());
                preparedStatement.setString(2, logMsg.getSrcSysCode());
                preparedStatement.setString(3, logMsg.getRelSysCode());
                preparedStatement.setString(4, logMsg.getOperatePerson());
                preparedStatement.addBatch();
            }
            //执行批处理
                     int[] count = preparedStatement.executeBatch();
            conn.close();
            preparedStatement.close();

            log.info("成功了插入了" + count.length + "行数据");
        }
        catch (Exception e){
            log.info("批量插入失败,失败的日志批为:{}",logMsgList);
            e.printStackTrace();
        }
    }


 
    private ClickHouseConnection getConn() throws SQLException {
        //获取属性信息
        EnvPropertiesUtil envProperties = new EnvPropertiesUtil();
        Properties props = envProperties.getEnvProperties();

        ClickHouseProperties properties = new ClickHouseProperties();
        properties.setUser(props.getProperty(Constants.CLICKHOUSE_USER));
        properties.setPassword(props.getProperty(Constants.CLICKHOUSE_PASSWD));
        properties.setSessionId(Constants.DEFAULT_SESSION_ID);
        ClickHouseDataSource dataSource = new ClickHouseDataSource(props.getProperty(Constants.CLICKHOUSE_DB), properties);
        Map<ClickHouseQueryParam, String> additionalDBParams = new HashMap<>();

        additionalDBParams.put(ClickHouseQueryParam.SESSION_ID, Constants.NEW_SESSION_ID);
        return dataSource.getConnection();
    }
}

想获取更多IT技术实战分享,欢迎关注公众号“海南来成科技”吧。
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

IT小太阳2022

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值