导读:此文实现是通过配置Kafka的事务相关参数和Flink的Checkpoint模式为EXACTLY_ONCE以保证数据的一致性和幂等,具体知识可参考本文下方Flink Checkpoint介绍
Kafka生产端:
pom
<!-- Flink Data Stream依赖 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java</artifactId>
<version>1.16.0</version>
</dependency>
<!-- Flink clients依赖 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients</artifactId>
<version>1.16.0</version>
</dependency>
<!-- Flink Json 序列化依赖 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-json</artifactId>
<version>1.16.0</version>
<scope>provided</scope>
</dependency>
<!-- Flink Shaded Jackson 2 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-shaded-jackson</artifactId>
<version>2.13.4-16.0</version>
<scope>provided</scope>
</dependency>
<!-- Flink Kafka Connector-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka</artifactId>
<version>1.16.0</version>
</dependency>
JAVA
/**
* 通过flink推送到kafka
*
* @param req
* @param brokers
* @throws Exception
*/
private void sinkToKafka(List<VisitActionDTO> req, String brokers) throws Exception {
//初始化环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 每 1000ms 开始一次 checkpoint
env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE, true);
// // 开启Incremental Checkpoint
// val enableIncrementalCheckpointing = true
// env.setStateBackend(new RocksDBStateBackend(checkpointPath, enableIncrementalCheckpointing))
//定义Json序列化结构
JsonSerializationSchema<VisitActionDTO> jsonFormat = new JsonSerializationSchema<>();
//接收的实体列表作为装入数据源
DataStreamSource<VisitActionDTO> stream = env.fromCollection(req);
Properties producerConfig = new Properties();
producerConfig.putAll(kafkaConfig.getProducer());
//构建推送kafka的管道结构
KafkaSink<VisitActionDTO> sink = KafkaSink.<VisitActionDTO>builder()
.setBootstrapServers(brokers)
// .setProperty("transaction.max.timeout.ms","90000000")
.setProperty("transaction.timeout.ms", 1000 * 60 * 10 + "")
.setProperty("allow.auto.create.topics", "true")
.setProperty("offsets.commit.required.acks", "1")
// .setKafkaProducerConfig(producerConfig)
.setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setTopic(TrackConstant.TOPIC_REQUESTS_LIST)
.setValueSerializationSchema(jsonFormat)
.build()
)
// 在这种模式下,KafkaSink 将写入 Kafka 事务中的所有消息,这些消息将在检查点上提交给 Kafka。
// 因此,如果消费者只读取提交的数据(参见 Kafka 消费者配置隔离级别),则在 Flink 重启的情况下不会看到重复数据。
// 但是,这会有效地延迟记录可见性,直到写入检查点,因此相应地调整检查点持续时间。
// 请确保在同一 Kafka 集群上运行的应用程序中使用唯一的 transactionalIdPrefix,这样多个正在运行的作业不会干扰它们的事务!
// 此外,强烈建议调整 Kafka 事务超时(请参阅 Kafka 生产者 transaction.timeout.ms)» 最大检查点持续时间 + 最大重启持续时间
// 或当 Kafka 未提交的事务到期时可能会发生数据丢失。
.setDeliveryGuarantee(DeliveryGuarantee.EXACTLY_ONCE)
// .setDeliveryGuarantee(DeliveryGuarantee.NONE)
.setTransactionalIdPrefix(TrackConstant.TRANSACTION_ID_PREFIX + UUID.randomUUID())
.build();
//接入推送管道
stream.sinkTo(sink);
//脚本开始执行
env.execute("Kafka Produce Job");
}
消费Kafka批量插入Clickhouse
pom
<!-- 添加Table API + DataStream API相关的依赖 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge</artifactId>
<version>1.16.0</version>
</dependency>
<!-- Flink clients依赖 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients</artifactId>
<version>1.16.0</version>
</dependency>
<!-- Flink Json 序列化依赖 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-json</artifactId>
<version>1.16.0</version>
<scope>compile</scope>
</dependency>
<!-- Flink Shaded Jackson 2 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-shaded-jackson</artifactId>
<version>2.13.4-16.0</version>
<scope>compile</scope>
</dependency>
<!-- Flink Kafka Connector-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka</artifactId>
<version>1.16.0</version>
</dependency>
<!-- Flink JDBC Connector-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc</artifactId>
<version>1.16.0</version>
</dependency>
JAVA
实现思路:通过实现SpringBoot的CommandLine接口以达到容器启动完成就初始化并常驻的任务操作。
VisitActionKafkaToChCommand.java:
package com.lazyshop.flink.track.visitaction.command;
import com.lazyshop.flink.track.visitaction.configuration.KafkaConfig;
import com.lazyshop.flink.track.visitaction.configuration.ClickhouseConfig;
import com.lazyshop.flink.track.visitaction.constant.VisitActionConstant;
import com.lazyshop.flink.track.visitaction.entity.dto.VisitActionDTO;
import com.lazyshop.flink.track.visitaction.sink.VisitActionJdbcSink;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.connector.kafka.source.KafkaSource;
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
import org.apache.flink.formats.json.JsonDeserializationSchema;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.kafka.clients.consumer.OffsetResetStrategy;
import org.junit.jupiter.api.Order;
import org.springframework.boot.CommandLineRunner;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.util.Arrays;
import java.util.Properties;
/**
* 消费kafka中的访问动作数据传输到clickhouse - 当作常驻任务跟随容器启动
*
* @author oliver
*/
@Component
@Slf4j
@Order(1)
public class VisitActionKafkaToChCommand implements CommandLineRunner {
@Resource
KafkaConfig kafkaConfig;
@Resource
ClickhouseConfig clickhouseConfig;
@Override
public void run(String[] args) throws Exception {
invoke();
}
/**
* 调用Flink任务
* 1)消费Kafka中收集到的数据
* 2)延时推送给Clickhouse
* 注:延迟时间取决与配置的消费位点提交间隔时间[VisitActionConstant.FLINK_CHECKPOINTING_INTERVAL_MS]
*/
private void invoke() {
//初始化环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//flink的checkpoint的时间间隔(此处也就是kafka消费端数据提交的间隔,暂定积压2分钟的数据提交一次处理)
//使用精准一次(exactly-once)的位点模式,保证一致性(需要事务的支持)
env.enableCheckpointing(VisitActionConstant.FLINK_CHECKPOINTING_INTERVAL_MS, CheckpointingMode.EXACTLY_ONCE);
//并行任务的最大数量
env.setMaxParallelism(VisitActionConstant.FLINK_ENV_MAX_PARALLELISM);
//启用单任务模式 若不开启 flink默认使用多个work并行
//env.setParallelism(1);
//定义Json反序列化结构
JsonDeserializationSchema<VisitActionDTO> jsonFormat = new JsonDeserializationSchema<>(VisitActionDTO.class);
//构建Kafka连接器消费kafka并将消息数据转化成数据流
DataStreamSource<VisitActionDTO> kafkaSource = env.fromSource(KafkaSource.<VisitActionDTO>builder()
.setBootstrapServers(kafkaConfig.getBrokers())
.setTopics(VisitActionConstant.TOPIC_REQUESTS_LIST)
.setGroupId(VisitActionConstant.GROUP_ID)
//装载kafka消费者配置
.setProperties(new Properties() {{
putAll(kafkaConfig.getConsumer());
}})
//设置最早提交的偏移量为起始消费位点(数据最完整,但可能会重复消费)
//.setStartingOffsets(OffsetsInitializer.earliest())
//设1分钟前为起始消费位点(自定义)
//.setStartingOffsets(OffsetsInitializer.timestamp(System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(1)))
//从消费组提交的偏移量作为起始消费位点(只取最新提交的消费位点做为起始消费位点。 避免重复消费历史位点)
//注:需要配合事务和EXACTLY_ONCE模式可保证一致性并避免重复消费
.setStartingOffsets(OffsetsInitializer.committedOffsets(OffsetResetStrategy.LATEST))
//配置反序列化类
.setValueOnlyDeserializer(jsonFormat)
//.setUnbounded(OffsetsInitializer.latest())
//设置有界(改为批模式)
//.setBounded(OffsetsInitializer.latest())
.build(),
//配置为单调递增时间戳分配器
WatermarkStrategy.forMonotonousTimestamps(),
"Kafka Consumer"
);
//备选暂存方案:当窗口到达指定条数时重新整理结构(需在jdbcSink时单独做分批处理)
//SingleOutputStreamOperator<Object> countWindowKafkaStream = kafkaSource.countWindowAll(VisitActionConstant.JDBC_EXECUTION_OPT_BATCH_SIZE)
// .apply(new AllWindowFunction<>() {
// @Override
// public void apply(GlobalWindow window, Iterable<VisitActionDTO> values, Collector<Object> out) throws Exception {
// //只收集本次窗口内的数据
// List<VisitActionDTO> lists = new ArrayList<>();
// for (VisitActionDTO visitActionDTO : values) {
// lists.add(visitActionDTO);
// }
// out.collect(lists);
// }
// });
//打印消费的数据
kafkaSource.printToErr();
log.info("从kafka获取数据并传输到clickhouse开始");
String selectColumns = "id_va,idsite,idvisitor,idvisit,action_url_ref,action_name_ref,custom_float,pageview_position,server_time,idpageview,action_name,action_url,search_cat,search_count,time_spent_ref_action,action_product_cat,action_product_cat2,action_product_cat3,action_product_cat4,action_product_cat5,action_product_name,product_price,action_product_sku,action_event_action,action_event_category,action_content_interaction,action_content_name,action_content_piece,action_content_target,time_dom_completion,time_dom_processing,time_network,time_on_load,time_server,time_transfer,time_spent,location_ip,user_id,referer_keyword,referer_name,referer_type,referer_url,location_browser_lang,config_browser_engine,config_browser_name,config_browser_version,config_client_type,config_device_brand,config_device_model,config_device_type,config_os,config_os_version,visitor_localtime,config_resolution,config_cookie,config_flash,config_java,config_pdf,config_quicktime,config_realplayer,config_silverlight,config_windowsmedia,location_city,location_country,location_latitude,location_longitude,location_region,user_agent_str,idgoal,idorder,revenue,revenue_shipping,revenue_subtotal,revenue_tax,revenue_discount,items";
String inputColumnsAndType = "'id_va String,idsite Int32,idvisitor String,idvisit String,action_url_ref String,action_name_ref String,custom_float String,pageview_position Int32,server_time Int64,idpageview String,action_name String,action_url String,search_cat String,search_count Int32,time_spent_ref_action Int32,action_product_cat String,action_product_cat2 String,action_product_cat3 String,action_product_cat4 String,action_product_cat5 String,action_product_name String,product_price String,action_product_sku String,action_event_action String,action_event_category String,action_content_interaction String,action_content_name String,action_content_piece String,action_content_target String,time_dom_completion String,time_dom_processing Int32,time_network Int32,time_on_load String,time_server Int64,time_transfer Int64,time_spent String,location_ip String,user_id String,referer_keyword String,referer_name String,referer_type Int32,referer_url String,location_browser_lang String,config_browser_engine String,config_browser_name String,config_browser_version String,config_client_type Int32,config_device_brand String,config_device_model String,config_device_type Int32,config_os String,config_os_version String,visitor_localtime String,config_resolution String,config_cookie Int32,config_flash Int32,config_java Int32,config_pdf Int32,config_quicktime Int32,config_realplayer Int32,config_silverlight Int32,config_windowsmedia Int32,location_city String,location_country String,location_latitude String,location_longitude String,location_region String,user_agent_str String,idgoal String,idorder String,revenue Float64,revenue_shipping Float64,revenue_subtotal Float64,revenue_tax Float64,revenue_discount Float64,items String'";
//组装插入的sql语句
String sqlStr = "insert into visitAction (" + selectColumns + ") select " + selectColumns + " from input(" + inputColumnsAndType + ")";
log.info("sqlstr:" + sqlStr);
kafkaSource
//使用自定义的接收器把数据传输到clickhouse
.addSink(
VisitActionJdbcSink.getJdbcSink(
sqlStr,
clickhouseConfig,
VisitActionConstant.JDBC_EXECUTION_OPT_BATCH_SIZE,
VisitActionConstant.JDBC_EXECUTION_OPT_BATCH_INTERVAL_MS,
VisitActionConstant.JDBC_EXECUTION_OPT_MAX_RETRIES,
VisitActionConstant.JDBC_CONNECTION_OPT_CHECK_TIME_OUT_SECONDS
)
).name("Kafka Sink To Clickhouse");
env.setRestartStrategy(RestartStrategies.noRestart());
//将FLink任务的执行环境放入独立的新线程中执行,否则application启动时会一直卡在Flink执行中
new Thread(() -> {
try {
env.execute("Kafka Consumer And Transfer To Clickhouse");
} catch (Exception e) {
log.error(String.format("异常信息:%s 异常堆栈:%s", e.getMessage(), Arrays.toString(e.getStackTrace())));
}
}).start();
}
}
小贴士:在使用kafka连接器消费任务时,可通过window算子的countWindowAll来收集指定条数的消费数据,再进行结构的重新组装
countWindowAll示例:
//当窗口到达指定条数时重新整理结构
SingleOutputStreamOperator<Object> countWindowKafkaStream = kafkaSource.countWindowAll(4).apply(new AllWindowFunction<>() {
@Override
public void apply(GlobalWindow window, Iterable<VisitActionDTO> values, Collector<Object> out) throws Exception {
//只收集本次窗口内的数据
List<VisitActionDTO> lists = new ArrayList<>();
for (VisitActionDTO visitActionDTO : values) {
lists.add(visitActionDTO);
}
out.collect(lists);
}
});
VisitActionJdbcSink.java:
package com.lazyshop.flink.track.visitaction.sink;
import cn.hutool.core.util.URLUtil;
import com.lazyshop.flink.track.visitaction.configuration.ClickhouseConfig;
import com.lazyshop.flink.track.visitaction.entity.dto.VisitActionDTO;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.connector.jdbc.JdbcConnectionOptions;
import org.apache.flink.connector.jdbc.JdbcExecutionOptions;
import org.apache.flink.connector.jdbc.JdbcSink;
import org.apache.flink.connector.jdbc.JdbcStatementBuilder;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
import java.sql.PreparedStatement;
import java.sql.SQLException;
/**
* 自定义JdbcSinkFunction - VisitAction表
*
* @author oliver
*/
@Slf4j
public class VisitActionJdbcSink {
/**
* 定制SinkFunction
*
* @param sql 要执行的sql主体
* @param clickhouseConfig clickhouse数据库链接配置
* @param batchSize 批量写入的数据大小
* @param batchIntervalMs 每次批量写入最小间隔时间
* @param maxRetries 插入发生异常重试次数
* @param connectionCheckTimeoutSeconds 数据库连接超时时间
* @return 定制后的SinkFunction
*/
public static SinkFunction<VisitActionDTO> getJdbcSink(
String sql,
ClickhouseConfig clickhouseConfig,
int batchSize,
long batchIntervalMs,
int maxRetries,
int connectionCheckTimeoutSeconds
) {
//这个方法主要是完成对sql语句中的数据内容对PreparedStatement对象中占位符的赋值
return JdbcSink.sink(
sql,
(JdbcStatementBuilder<VisitActionDTO>) (preparedStatement, visitActionDTO) -> {
log.info("组装预处理字段数据");
preparedStatementBuild(preparedStatement, visitActionDTO);
},
JdbcExecutionOptions.builder()
.withBatchSize(batchSize)
.withBatchIntervalMs(batchIntervalMs)
.withMaxRetries(maxRetries)
.build(),
new JdbcConnectionOptions.JdbcConnectionOptionsBuilder()
.withUrl(clickhouseConfig.getUrl())
.withDriverName(clickhouseConfig.getDriverClassName())
.withConnectionCheckTimeoutSeconds(connectionCheckTimeoutSeconds)
.withUsername(clickhouseConfig.getUsername()).withPassword(clickhouseConfig.getPassword())
.build()
);
}
/**
* 组装预处理字段数据
*
* @param preparedStatement 预处理声明对象
* @param visitActionDTO 数据实体对象
* @throws SQLException SQL异常
*/
private static void preparedStatementBuild(PreparedStatement preparedStatement, VisitActionDTO visitActionDTO) throws SQLException {
preparedStatement.setString(1, visitActionDTO.getIdVa());
preparedStatement.setString(2, visitActionDTO.getIdsite());
preparedStatement.setString(3, visitActionDTO.getIdvisitor());
preparedStatement.setString(4, visitActionDTO.getIdvisit());
preparedStatement.setString(5, URLUtil.encodeAll(visitActionDTO.getActionUrlRef()));
preparedStatement.setString(6, visitActionDTO.getActionNameRef());
preparedStatement.setString(7, visitActionDTO.getCustomFloat());
preparedStatement.setInt(8, visitActionDTO.getPageviewPosition());
preparedStatement.setLong(9, visitActionDTO.getServerTime());
preparedStatement.setString(10, visitActionDTO.getIdpageview());
preparedStatement.setString(11, visitActionDTO.getActionName());
preparedStatement.setString(12, URLUtil.encodeAll(visitActionDTO.getActionUrl()));
preparedStatement.setString(13, visitActionDTO.getSearchCat());
preparedStatement.setInt(14, visitActionDTO.getSearchCount());
preparedStatement.setInt(15, visitActionDTO.getTimeSpentRefAction());
preparedStatement.setString(16, visitActionDTO.getActionProductCat());
preparedStatement.setString(17, visitActionDTO.getActionProductCat2());
preparedStatement.setString(18, visitActionDTO.getActionProductCat3());
preparedStatement.setString(19, visitActionDTO.getActionProductCat4());
preparedStatement.setString(20, visitActionDTO.getActionProductCat5());
preparedStatement.setString(21, visitActionDTO.getActionProductName());
preparedStatement.setString(22, visitActionDTO.getProductPrice());
preparedStatement.setString(23, visitActionDTO.getActionProductSku());
preparedStatement.setString(24, visitActionDTO.getActionEventAction());
preparedStatement.setString(25, visitActionDTO.getActionEventCategory());
preparedStatement.setString(26, visitActionDTO.getActionContentInteraction());
preparedStatement.setString(27, visitActionDTO.getActionContentName());
preparedStatement.setString(28, visitActionDTO.getActionContentPiece());
preparedStatement.setString(29, visitActionDTO.getActionContentTarget());
preparedStatement.setString(30, visitActionDTO.getTimeDomCompletion());
preparedStatement.setInt(31, visitActionDTO.getTimeDomProcessing());
preparedStatement.setInt(32, visitActionDTO.getTimeNetwork());
preparedStatement.setString(33, visitActionDTO.getTimeOnLoad());
preparedStatement.setLong(34, visitActionDTO.getTimeServer());
preparedStatement.setLong(35, visitActionDTO.getTimeTransfer());
preparedStatement.setString(36, visitActionDTO.getTimeSpent());
preparedStatement.setString(37, visitActionDTO.getLocationIp());
preparedStatement.setString(38, visitActionDTO.getUserId());
preparedStatement.setString(39, visitActionDTO.getRefererKeyword());
preparedStatement.setString(40, visitActionDTO.getRefererName());
preparedStatement.setInt(41, visitActionDTO.getRefererType());
preparedStatement.setString(42, URLUtil.encodeAll(visitActionDTO.getRefererUrl()));
preparedStatement.setString(43, visitActionDTO.getLocationBrowserLang());
preparedStatement.setString(44, visitActionDTO.getConfigBrowserEngine());
preparedStatement.setString(45, visitActionDTO.getConfigBrowserName());
preparedStatement.setString(46, visitActionDTO.getConfigBrowserVersion());
preparedStatement.setInt(47, visitActionDTO.getConfigClientType());
preparedStatement.setString(48, visitActionDTO.getConfigDeviceBrand());
preparedStatement.setString(49, visitActionDTO.getConfigDeviceModel());
preparedStatement.setInt(50, visitActionDTO.getConfigDeviceType());
preparedStatement.setString(51, visitActionDTO.getConfigOs());
preparedStatement.setString(52, visitActionDTO.getConfigOsVersion());
preparedStatement.setString(53, visitActionDTO.getVisitorLocaltime());
preparedStatement.setString(54, visitActionDTO.getConfigResolution());
preparedStatement.setByte(55, visitActionDTO.getConfigCookie());
preparedStatement.setByte(56, visitActionDTO.getConfigFlash());
preparedStatement.setByte(57, visitActionDTO.getConfigJava());
preparedStatement.setByte(58, visitActionDTO.getConfigPdf());
preparedStatement.setByte(59, visitActionDTO.getConfigQuicktime());
preparedStatement.setByte(60, visitActionDTO.getConfigRealplayer());
preparedStatement.setByte(61, visitActionDTO.getConfigSilverlight());
preparedStatement.setByte(62, visitActionDTO.getConfigWindowsmedia());
preparedStatement.setString(63, visitActionDTO.getLocationCity());
preparedStatement.setString(64, visitActionDTO.getLocationCountry());
preparedStatement.setString(65, visitActionDTO.getLocationLatitude());
preparedStatement.setString(66, visitActionDTO.getLocationLongitude());
preparedStatement.setString(67, visitActionDTO.getLocationRegion());
preparedStatement.setString(68, URLUtil.encodeAll(visitActionDTO.getUserAgentStr()));
preparedStatement.setString(69, visitActionDTO.getIdgoal());
preparedStatement.setString(70, visitActionDTO.getIdorder());
preparedStatement.setBigDecimal(71, visitActionDTO.getRevenue());
preparedStatement.setBigDecimal(72, visitActionDTO.getRevenueShipping());
preparedStatement.setBigDecimal(73, visitActionDTO.getRevenueSubtotal());
preparedStatement.setBigDecimal(74, visitActionDTO.getRevenueTax());
preparedStatement.setBigDecimal(75, visitActionDTO.getRevenueDiscount());
preparedStatement.setString(76, visitActionDTO.getItems());
}
}
VisitActionDTO.java:
package com.lazyshop.flink.track.visitaction.entity.dto;
import com.baomidou.mybatisplus.annotation.TableField;
import lombok.Data;
import javax.validation.constraints.NotNull;
import java.math.BigDecimal;
/**
* <p>
* VisitActionDTO
* </p>
*
* @author oliver.li
* @since 2022-03-31 04:04:21
*/
@Data
public class VisitActionDTO {
@NotNull(message = "主键id不能为空")
@TableField("id_va")
private String idVa;
@NotNull(message = "站点id不能为空")
@TableField("idsite")
private String idsite;
@NotNull(message = "访客id不能为空")
@TableField("idvisitor")
private String idvisitor;
@TableField("idvisit")
private String idvisit;
@TableField("action_url_ref")
private String actionUrlRef;
@TableField("action_name_ref")
private String actionNameRef;
@TableField("custom_float")
private String customFloat;
@TableField("pageview_position")
private Integer pageviewPosition;
@TableField("server_time")
private Long serverTime;
@TableField("idpageview")
private String idpageview;
@TableField("action_name")
private String actionName;
@TableField("action_url")
private String actionUrl;
@TableField("search_cat")
private String searchCat;
@TableField("search_count")
private Integer searchCount;
@TableField("time_spent_ref_action")
private Integer timeSpentRefAction;
@TableField("action_product_cat")
private String actionProductCat;
@TableField("action_product_cat2")
private String actionProductCat2;
@TableField("action_product_cat3")
private String actionProductCat3;
@TableField("action_product_cat4")
private String actionProductCat4;
@TableField("action_product_cat5")
private String actionProductCat5;
@TableField("action_product_name")
private String actionProductName;
@TableField("product_price")
private String productPrice;
@TableField("action_product_sku")
private String actionProductSku;
@TableField("action_event_action")
private String actionEventAction;
@TableField("action_event_category")
private String actionEventCategory;
@TableField("action_content_interaction")
private String actionContentInteraction;
@TableField("action_content_name")
private String actionContentName;
@TableField("action_content_piece")
private String actionContentPiece;
@TableField("action_content_target")
private String actionContentTarget;
@TableField("time_dom_completion")
private String timeDomCompletion;
@TableField("time_dom_processing")
private Integer timeDomProcessing;
@TableField("time_network")
private Integer timeNetwork;
@TableField("time_on_load")
private String timeOnLoad;
@TableField("time_server")
private Long timeServer;
@TableField("time_transfer")
private Long timeTransfer;
@TableField("time_spent")
private String timeSpent;
/**
* 访客所在时区的本地时间
*/
@TableField("visitor_localtime")
private String visitorLocaltime;
/**
* 用户ID(登录后的访问才有值)
*/
@TableField("user_id")
private String userId;
/**
* 访客来本站的推介类型.
*/
@TableField("referer_type")
private Integer refererType;
/**
* 推介网站的名称,它的值依赖于推介类型referer_type。
*/
@TableField("referer_name")
private String refererName;
/**
* 推介网站的网址; 它的值依赖于推介类型referer_type
*/
@TableField("referer_url")
private String refererUrl;
/**
* 如果推介类型是搜索引擎,本字段就是用户的搜索词(由于搜索引擎加密可能获取不到)
*/
@TableField("referer_keyword")
private String refererKeyword;
/**
* 客户端的操作系统名称
*/
@TableField("config_os")
private String configOs;
/**
* 访客浏览器名称
*/
@TableField("config_browser_name")
private String configBrowserName;
/**
* 访客浏览器版本
*/
@TableField("config_browser_version")
private String configBrowserVersion;
/**
* 访客屏幕分辨率 (eg, ‘1024×768’)
*/
@TableField("config_resolution")
private String configResolution;
/**
* 访客浏览器是否可以浏览PDF文件
*/
@TableField("config_pdf")
private byte configPdf;
/**
* 访客浏览器是否可以播放flash
*/
@TableField("config_flash")
private byte configFlash;
/**
* 访客浏览器是否可以运行java
*/
@TableField("config_java")
private byte configJava;
/**
* 访客浏览器是否使用quicktime插件播放媒体文件
*/
@TableField("config_quicktime")
private byte configQuicktime;
/**
* 访客浏览器是否可以播放realplayer媒体文件
*/
@TableField("config_realplayer")
private byte configRealplayer;
/**
* 访客浏览器是否使用windows media播放器播放媒体文件
*/
@TableField("config_windowsmedia")
private byte configWindowsmedia;
/**
* 访客浏览器是否可以运行silverlight程序
*/
@TableField("config_silverlight")
private byte configSilverlight;
/**
* 访客浏览器是否启用了cookie
*/
@TableField("config_cookie")
private byte configCookie;
/**
* 访客对应的公网IP地址
*/
@TableField("location_ip")
private String locationIp;
/**
* 访客浏览器的语言
*/
@TableField("location_browser_lang")
private String locationBrowserLang;
/**
* 访客访问时所在国家的地区二位字母缩写
*/
@TableField("location_country")
private String locationCountry;
/**
* 访客访问时所在国家的地区二位字母缩写
*/
@TableField("location_region")
private String locationRegion;
/**
* 访客访问时所在城市名称
*/
@TableField("location_city")
private String locationCity;
/**
* 访客访问时所在经度
*/
@TableField("location_latitude")
private String locationLatitude;
/**
* 访客访问时所在纬度
*/
@TableField("location_longitude")
private String locationLongitude;
/**
* 访客浏览器引擎
*/
@TableField("config_browser_engine")
private String configBrowserEngine;
@TableField("config_client_type")
private Integer configClientType;
@TableField("config_device_brand")
private String configDeviceBrand;
@TableField("config_device_model")
private String configDeviceModel;
@TableField("config_device_type")
private Integer configDeviceType;
@TableField("config_os_version")
private String configOsVersion;
/**
* 用户的UserAgent信息
*/
@TableField("user_agent_str")
private String userAgentStr;
/**
* ----- 转化相关字段 -----
* /**
* 转化的目标ID
*/
@TableField("idgoal")
private String idgoal;
/**
* 订单ID
*/
@TableField("idorder")
private String idorder;
/**
* 订单总金额
*/
@TableField("revenue")
private BigDecimal revenue;
/**
* 物流费用
*/
@TableField("revenue_shipping")
private BigDecimal revenueShipping;
/**
* 总成本
*/
@TableField("revenue_subtotal")
private BigDecimal revenueSubtotal;
/**
* 总税额
*/
@TableField("revenue_tax")
private BigDecimal revenueTax;
/**
* 折扣总额
*/
@TableField("revenue_discount")
private BigDecimal revenueDiscount;
/**
* 订单商品列表 JSON字符串
*/
@TableField("items")
private String items;
}
VisitActionConstant.java:
package com.lazyshop.flink.track.visitaction.constant;
/**
* 访问动作常量配置文件
*
* @author oliver.li
* @title: VisitActionConstant
* @projectName lazyshop
* @date 2023-1-4 10:12
*/
public class VisitActionConstant {
/**
* kafka topic:请求收集到的数据
*/
public static final String TOPIC_REQUESTS_LIST = "requests-list";
/**
* kafka group:服务名
*/
public static final String GROUP_ID = "gateway-track";
/**
* todo jdbc写入ch前暂存数据条数,上线需要调大,避免ch频繁插入
* 批量写入的数据大小
*/
public static final int JDBC_EXECUTION_OPT_BATCH_SIZE = 4;
/**
* 每次批量写入最小间隔时间
*/
public static final long JDBC_EXECUTION_OPT_BATCH_INTERVAL_MS = 0;
/**
* 插入发生异常重试次数 注意:只支持SQL Exception 异常及其子类异常重试
*/
public static final int JDBC_EXECUTION_OPT_MAX_RETRIES = 0;
/**
* 数据库连接超时时间
*/
public static final int JDBC_CONNECTION_OPT_CHECK_TIME_OUT_SECONDS = 60;
/**
* flink的checkpoint的时间间隔
*/
public static final long FLINK_CHECKPOINTING_INTERVAL_MS = 120000;
/**
* flink并行任务的最大数量
*/
public static final int FLINK_ENV_MAX_PARALLELISM = 256;
}