一、使用Flink操作Clickhouse Source实战
主要通过继承Flink的RichSourceFunction类实现Clickhouse Source和重写run方法。
public class ClickHouseSourceUtil extends RichSourceFunction<List<LogMessage>> {
private volatile boolean isRunning = true;
private Connection connection;
String sql;
String logStartDate;
String logEndDate;
public ClickHouseSourceUtil(String sql,String logStartDate,String logEndDate) {
this.sql = sql;
this.logStartDate = logStartDate;
this.logEndDate = logEndDate;
}
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
//获取属性信息
EnvPropertiesUtil envProperties = new EnvPropertiesUtil();
Properties props = envProperties.getEnvProperties();
// 初始化连接
String url = props.getProperty(Constants.CLICKHOUSE_DB);
connection = DriverManager.getConnection(url, props.getProperty(Constants.CLICKHOUSE_USER), props.getProperty(Constants.CLICKHOUSE_PASSWD));
}
@Override
public void run(SourceFunction.SourceContext<List<LogMessage>> ctx) throws Exception {
int batchOffset = 0;//初始化偏移量为0,即从第一行开始查询数据
int batchSize = Constants.CLICKHOUSE_QUERY_BATCH_SIZE; // 假设每次查询1000条记录
while (isRunning) {
String dataSql = " where toYYYYMMDD(log_date) = "+logStartDate+" ";
String query = sql+ dataSql +" ORDER BY id LIMIT " + batchSize + " OFFSET " + batchOffset ;
log.info("clickhouse log_info查询语句 :{}", query);
try (Statement stmt = connection.createStatement();
ResultSet rs = stmt.executeQuery(query)) {
ArrayList<LogMessage> logList = new ArrayList<>();
int rowCount = 0;
while (rs.next()) {
//rs.getInt("id")
rowCount++;
logList.add(convertResultSetToObjects(rs));
}
ctx.collect(logList);
if (0 == rowCount || rowCount < batchSize) {
// Thread.sleep(10000);
isRunning = false;
}
}
batchOffset += batchSize;
// 简单暂停一下模拟批处理间隔
// Thread.sleep(1000);
}
}
private LogMessage convertResultSetToObjects(ResultSet rs) throws SQLException {
DateTimeFormatter fmt = DateTimeFormatter.ofPattern(Constants.DATATIME_FORMAT);
LogMessage logMessage = new LogMessage();
String id = rs.getString("id");
String logType = rs.getString("log_type");
String logOrigin = rs.getString("log_origin");
String logDate = rs.getString("log_date");
String reportDate = rs.getString("report_date");
logMessage.setId(id);
logMessage.setLogType(logType);
logMessage.setLogOrigin(logOrigin);
logMessage.setLogDate(LocalDateTime.parse(logDate,fmt));
logMessage.setReportDate(LocalDateTime.parse(reportDate,fmt));
logMessage.setHash(hash);
return logMessage;
}
@Override
public void cancel() {
isRunning = false;
}
@Override
public void close() throws Exception {
if (connection != null) {
connection.close();
}
super.close();
}
}
二、使用Flink操作Clickhouse 单条插入Sink实战
主要通过继承Flink的RichSinkFunction类实现Clickhouse Source和重写invoke方法。并通过preparedStatement.execute()实现单条数据插入到clickhouse。
public class ClickHouseUtil extends RichSinkFunction<LogMessage> {
private ClickHouseConnection conn = null;
String sql;
public ClickHouseUtil(String sql) {
this.sql = sql;
}
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
return ;
}
@Override
public void close() throws Exception {
super.close();
if (conn != null)
{
conn.close();
}
}
/**
* 单条提交
* @param logMsg 日志信息对象
* @param context 上下文信息
* @throws Exception 数据库异常信息
*/
@Override
public void invoke(LogMessage logMsg, Context context) {
//获取属性信息
EnvPropertiesUtil envProperties = new EnvPropertiesUtil();
Properties props = envProperties.getEnvProperties();
ClickHouseProperties properties = new ClickHouseProperties();
properties.setUser(props.getProperty(Constants.CLICKHOUSE_USER));
properties.setPassword(props.getProperty(Constants.CLICKHOUSE_PASSWD));
properties.setSessionId(Constants.DEFAULT_SESSION_ID);
ClickHouseDataSource dataSource = new ClickHouseDataSource(props.getProperty(Constants.CLICKHOUSE_DB), properties);
Map<ClickHouseQueryParam, String> additionalDBParams = new HashMap<>();
additionalDBParams.put(ClickHouseQueryParam.SESSION_ID, Constants.NEW_SESSION_ID);
DateTimeFormatter fmt = DateTimeFormatter.ofPattern(Constants.DATATIME_FORMAT);
String logDateStr = logMsg.getLogDate().format(fmt);
String reportDateStr = logMsg.getReportDate().format(fmt);
try {
conn = dataSource.getConnection();
PreparedStatement preparedStatement = conn.prepareStatement(sql);
preparedStatement.setString(1,logMsg.getOrgCode());
preparedStatement.setString(2, logMsg.getContent());
preparedStatement.setString(3, logMsg.getIp());
preparedStatement.setString(4, logMsg.getAddr());
//单条执行,execute()方法执行SQL语句并得到影响的行数。会自动提交事务。不需要手动提交事务
preparedStatement.execute();
}
catch (Exception e){
e.printStackTrace();
}
}
}
三、使用Flink操作Clickhouse 批量插入Sink实战
主要通过继承Flink的RichSinkFunction类实现Clickhouse Source和重写invoke方法。并通过preparedStatement.addBatch()和preparedStatement.executeBatch()实现批量数据插入到clickhouse。
public class ClickHouseBatchUtil extends RichSinkFunction<List<LogMessage>> {
String sql;
public ClickHouseBatchUtil(String sql) {
this.sql = sql;
}
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
return ;
}
/**
* 批量提交
* @param logMsgList 日志信息对象
* @param context 上下文信息
* @throws Exception 数据库异常信息
*/
@Override
public void invoke(List<LogMessage> logMsgList, Context context) {
DateTimeFormatter fmt = DateTimeFormatter.ofPattern(Constants.DATATIME_FORMAT);
String logDateStr = "";
String reportDateStr = "";
try {
ClickHouseConnection conn = getConn();
PreparedStatement preparedStatement = conn.prepareStatement(sql);
for (LogMessage logMsg : logMsgList) {
logDateStr = logMsg.getLogDate().format(fmt);
reportDateStr = logMsg.getReportDate().format(fmt);
preparedStatement.setString(1,logMsg.getOrgCode());
preparedStatement.setString(2, logMsg.getSrcSysCode());
preparedStatement.setString(3, logMsg.getRelSysCode());
preparedStatement.setString(4, logMsg.getOperatePerson());
preparedStatement.addBatch();
}
//执行批处理
int[] count = preparedStatement.executeBatch();
conn.close();
preparedStatement.close();
log.info("成功了插入了" + count.length + "行数据");
}
catch (Exception e){
log.info("批量插入失败,失败的日志批为:{}",logMsgList);
e.printStackTrace();
}
}
private ClickHouseConnection getConn() throws SQLException {
//获取属性信息
EnvPropertiesUtil envProperties = new EnvPropertiesUtil();
Properties props = envProperties.getEnvProperties();
ClickHouseProperties properties = new ClickHouseProperties();
properties.setUser(props.getProperty(Constants.CLICKHOUSE_USER));
properties.setPassword(props.getProperty(Constants.CLICKHOUSE_PASSWD));
properties.setSessionId(Constants.DEFAULT_SESSION_ID);
ClickHouseDataSource dataSource = new ClickHouseDataSource(props.getProperty(Constants.CLICKHOUSE_DB), properties);
Map<ClickHouseQueryParam, String> additionalDBParams = new HashMap<>();
additionalDBParams.put(ClickHouseQueryParam.SESSION_ID, Constants.NEW_SESSION_ID);
return dataSource.getConnection();
}
}
想获取更多IT技术实战分享,欢迎关注公众号“海南来成科技”吧。