package com.train.data.processor;
import com.train.data.config.ConfigManager;
import com.train.data.entity.InfoAppendix;
import com.train.data.entity.SaleRecord;
import com.train.data.utils.HdfsUtils;
import com.train.data.utils.HiveTableUtils;
import com.train.data.utils.KerberosUtils;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.state.hashmap.HashMapStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.util.Collector;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
/**
* 调试版本:文本文件处理作业,打印详细的SQL和错误信息
*/
public class ZipFileProcessorJob {
private static final Logger logger = LogManager.getLogger(ZipFileProcessorJob.class);
public static void main(String[] args) throws Exception {
logger.info("启动文本文件处理作业(调试版本)...");
try {
// 1. 首先设置关键的系统属性(在任何Flink操作之前)
setupCriticalSystemProperties();
// 2. 预先设置系统属性以避免Kerberos初始化问题
setupSystemProperties();
// 3. 初始化Kerberos认证(如果启用)
initializeKerberosIfEnabled();
// 4. 创建执行环境(使用特殊配置避免delegation token问题)
StreamExecutionEnvironment env = createSecureExecutionEnvironment();
// 5. 配置 Flink 作业
configureFlinkJob(env);
// 6. 创建数据源:定时扫描 HDFS 目录,直接读取文本文件
DataStream<String> textFileStream = env
.addSource(new TextFileSourceFunction())
.name("TextFileSource")
.assignTimestampsAndWatermarks(WatermarkStrategy.noWatermarks());
// 7. 读取文件内容
DataStream<Tuple2<String, String>> fileContentStream = textFileStream
.map(new TextFileReadFunction())
.name("TextFileReader");
// 8. 解析并写入 info_appendix 表
DataStream<InfoAppendix> infoAppendixStream = fileContentStream
.filter(t -> t.f0.contains("GASMZ_info_appendix"))
.flatMap(new InfoAppendixParseFunction())
.name("InfoAppendixParser");
infoAppendixStream.addSink(new DebugInfoAppendixSink())
.name("InfoAppendixDebugSink");
// 9. 解析并写入 sale_record 表
DataStream<SaleRecord> saleRecordStream = fileContentStream
.filter(t -> t.f0.contains("GASMZ_sale"))
.flatMap(new SaleRecordParseFunction())
.name("SaleRecordParser");
saleRecordStream.addSink(new DebugSaleRecordSink())
.name("SaleRecordDebugSink");
// 10. 执行作业
env.execute("TextFileProcessorJob-Debug");
} catch (Exception e) {
logger.error("文本文件处理作业启动失败", e);
throw e;
}
}
/**
* 设置关键系统属性(必须在Flink环境创建之前调用)
*/
private static void setupCriticalSystemProperties() {
// 关键:禁用Flink的delegation token管理器
System.setProperty("flink.security.kerberos.token.provider.enabled", "false");
// 设置Flink为本地执行模式
System.setProperty("flink.execution.target", "local");
// 禁用Flink安全相关功能
System.setProperty("flink.security.ssl.enabled", "false");
// 设置Hadoop用户
System.setProperty("HADOOP_USER_NAME", "hive");
logger.info("关键系统属性设置完成");
}
/**
* 创建安全的执行环境
*/
private static StreamExecutionEnvironment createSecureExecutionEnvironment() {
try {
// 创建配置
org.apache.flink.configuration.Configuration config = new org.apache.flink.configuration.Configuration();
// 禁用安全相关配置
config.setBoolean("security.kerberos.login.use-ticket-cache", false);
// 设置为本地执行
config.setString("execution.target", "local");
// 创建本地环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(2, config);
logger.info("安全执行环境创建成功");
return env;
} catch (Exception e) {
logger.warn("创建安全执行环境失败,使用默认环境: {}", e.getMessage());
return StreamExecutionEnvironment.getExecutionEnvironment();
}
}
/**
* 设置系统属性以避免初始化问题
*/
private static void setupSystemProperties() {
try {
// 设置Hadoop配置目录
System.setProperty("hadoop.home.dir", System.getProperty("user.dir"));
if (ConfigManager.isKerberosEnabled()) {
// 设置Kerberos配置文件路径
String krb5ConfPath = ConfigManager.getKrb5ConfPath();
if (krb5ConfPath != null) {
// 如果是相对路径,转换为绝对路径
if (krb5ConfPath.startsWith("src\\main\\resources\\")) {
krb5ConfPath = System.getProperty("user.dir") + "/" + krb5ConfPath;
}
System.setProperty("java.security.krb5.conf", krb5ConfPath);
logger.info("设置Kerberos配置文件路径: {}", krb5ConfPath);
}
// 设置JAAS配置文件
String jaasConfPath = System.getProperty("user.dir") + "D:/Development/GZKY/cursor-flink-hive/src/main/resources/flink-jaas.conf";
System.setProperty("java.security.auth.login.config", jaasConfPath);
// 设置其他Kerberos系统属性
System.setProperty("javax.security.auth.useSubjectCredsOnly", "false");
} else {
// 禁用Kerberos时设置简单认证
System.setProperty("hadoop.security.authentication", "simple");
System.setProperty("hadoop.security.authorization", "false");
logger.info("Kerberos已禁用,使用简单认证模式");
}
} catch (Exception e) {
logger.warn("设置系统属性时发生错误: {}", e.getMessage());
}
}
/**
* 如果启用Kerberos则进行初始化
*/
private static void initializeKerberosIfEnabled() {
try {
if (ConfigManager.isKerberosEnabled()) {
logger.info("Kerberos已启用,开始初始化...");
KerberosUtils.initKerberos();
logger.info("Kerberos初始化完成");
} else {
logger.info("Kerberos已禁用,跳过Kerberos初始化");
}
} catch (Exception e) {
logger.error("Kerberos初始化失败,但程序将继续运行", e);
}
}
/** 配置 Flink 作业参数 */
private static void configureFlinkJob(StreamExecutionEnvironment env) {
env.setParallelism(ConfigManager.getFlinkJobParallelism());
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(
ConfigManager.getFlinkRestartAttempts(),
org.apache.flink.api.common.time.Time.of(ConfigManager.getFlinkRestartDelay(), TimeUnit.MILLISECONDS)
));
env.setStateBackend(new HashMapStateBackend());
env.enableCheckpointing(ConfigManager.getFlinkCheckpointInterval());
CheckpointConfig checkpointConfig = env.getCheckpointConfig();
checkpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
checkpointConfig.setCheckpointTimeout(ConfigManager.getFlinkCheckpointTimeout());
checkpointConfig.setExternalizedCheckpointCleanup(
CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
logger.info("Flink 作业配置完成");
}
// ---------------------------------------------------------------------
// 自定义 SourceFunction:定时扫描 testInput 目录下的目标文本文件
// ---------------------------------------------------------------------
public static class TextFileSourceFunction extends RichSourceFunction<String> {
private static final Logger logger = LogManager.getLogger(TextFileSourceFunction.class);
private volatile boolean isRunning = true;
private final long scanInterval = ConfigManager.getZipProcessorInterval();
@Override
public void run(SourceContext<String> ctx) {
logger.info("TextFileSourceFunction 开始运行,扫描间隔: {} ms", scanInterval);
// TaskManager 内部重新认证(如适用)
try {
KerberosUtils.reloginInTaskManager();
} catch (Exception e) {
logger.warn("TaskManager 中 Kerberos 认证失败: {}", e.getMessage());
}
while (isRunning) {
try {
String inputPath = ConfigManager.getHdfsInputPath();
List<String> textFiles;
// 扫描 .txt 文件
try {
textFiles = KerberosUtils.doAs(() -> HdfsUtils.listFilesByExtension(inputPath, ".txt"));
logger.debug("扫描目录 {},找到 {} 个文本文件", inputPath, textFiles.size());
} catch (Exception e) {
logger.error("扫描目录失败: {}", e.getMessage());
// 尝试重新认证再扫描
try {
KerberosUtils.reloginInTaskManager();
textFiles = KerberosUtils.doAs(() -> HdfsUtils.listFilesByExtension(inputPath, ".txt"));
} catch (Exception retryEx) {
logger.error("重试后仍失败: {}", retryEx.getMessage());
textFiles = new ArrayList<>();
}
}
// 发送文件路径
for (String file : textFiles) {
logger.info("发现文本文件: {}", file);
ctx.collect(file);
}
Thread.sleep(scanInterval);
} catch (Exception ex) {
logger.error("扫描文本文件时发生错误", ex);
try {
Thread.sleep(10_000);
} catch (InterruptedException ignored) {
}
}
}
}
@Override
public void cancel() {
isRunning = false;
logger.info("TextFileSourceFunction 已停止");
}
}
// ---------------------------------------------------------------------
// MapFunction:读取文本文件内容
// ---------------------------------------------------------------------
public static class TextFileReadFunction implements MapFunction<String, Tuple2<String, String>> {
private static final Logger logger = LogManager.getLogger(TextFileReadFunction.class);
@Override
public Tuple2<String, String> map(String filePath) throws Exception {
logger.info("开始读取文本文件: {}", filePath);
FSDataInputStream is = KerberosUtils.doAs(() -> HdfsUtils.openFile(filePath));
StringBuilder sb = new StringBuilder();
try (BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
String line;
while ((line = br.readLine()) != null) {
sb.append(line).append('\n');
}
}
// 删除原文件以免重复处理
boolean deleted = KerberosUtils.doAs(() -> HdfsUtils.delete(filePath, false));
if (deleted) {
logger.info("已处理并删除文件: {}", filePath);
} else {
logger.warn("文件删除失败: {}", filePath);
}
String fileName = filePath.substring(filePath.lastIndexOf('/') + 1);
return Tuple2.of(fileName, sb.toString());
}
}
// -------------------------------------------------------------------
// InfoAppendix 解析(FlatMap)- 修复字段解析
// -------------------------------------------------------------------
public static class InfoAppendixParseFunction
implements FlatMapFunction<Tuple2<String, String>, InfoAppendix> {
private static final Logger logger = LogManager.getLogger(InfoAppendixParseFunction.class);
@Override
public void flatMap(Tuple2<String, String> value, Collector<InfoAppendix> out) {
String fileName = value.f0;
String[] rows = value.f1.split("\\n");
if (rows.length == 0) {
logger.warn("文件 {} 内容为空", fileName);
return;
}
for (String raw : rows) {
try {
String line = raw.replace("\r", "").trim();
if (line.isEmpty() || line.startsWith("statist_date")) {
continue;
}
String[] f = HiveTableUtils.parseCsvLine(line, ConfigManager.getFileDelimiter());
// 修复:应该检查是否有10个字段
if (f.length < 10) {
logger.warn("文件 {} 行字段不足 (<10):{}", fileName, line);
continue;
}
InfoAppendix info = new InfoAppendix();
info.setStatistDate(f[0]);
info.setStatisticsDate(f[1]);
info.setInnerCode(f[2]);
info.setOfficeNo(f[3]);
info.setWindowNo(f[4]);
info.setTicketNo(f[5]);
info.setIdKind(f[6]);
info.setIdNo(f[7]);
info.setIdName(f[8]);
// 修复:正确读取area_center_code字段
info.setAreaCenterCode(f[9]);
info.setPk(HiveTableUtils.generatePrimaryKey(
info.getStatisticsDate(), info.getOfficeNo(),
info.getWindowNo(), info.getTicketNo()));
logger.debug("解析成功 InfoAppendix:ticketNo={}, pk={}, areaCode={}",
info.getTicketNo(), info.getPk(), info.getAreaCenterCode());
out.collect(info);
} catch (Exception ex) {
logger.error("解析 InfoAppendix 行失败,文件 {}:{}", fileName, raw, ex);
}
}
}
}
// -------------------------------------------------------------------
// SaleRecord 解析(FlatMap)- 修复字段解析
// -------------------------------------------------------------------
public static class SaleRecordParseFunction
implements FlatMapFunction<Tuple2<String, String>, SaleRecord> {
private static final Logger logger = LogManager.getLogger(SaleRecordParseFunction.class);
@Override
public void flatMap(Tuple2<String, String> value, Collector<SaleRecord> out) {
String fileName = value.f0;
String[] rows = value.f1.split("\\n");
if (rows.length == 0) {
logger.warn("文件 {} 内容为空", fileName);
return;
}
for (String raw : rows) {
try {
String line = raw.replace("\r", "").trim();
if (line.isEmpty() || line.startsWith("statist_date")) {
continue;
}
String[] f = HiveTableUtils.parseCsvLine(line, ConfigManager.getFileDelimiter());
// 修复:应该检查是否有26个字段
if (f.length < 26) {
logger.warn("文件 {} 行字段不足 (<26):{}", fileName, line);
continue;
}
SaleRecord r = new SaleRecord();
r.setStatistDate(f[0]);
r.setTrainDate(f[1]);
r.setBoardTrainCode(f[2]);
r.setFromTeleCode(f[3]);
r.setToTeleCode(f[4]);
r.setFromStationName(f[5]);
r.setToStationName(f[6]);
r.setStartTime(f[7]);
r.setCoachNo(f[8]);
r.setSeatNo(f[9]);
r.setSeatTypeCode(f[10]);
r.setTicketType(f[11]);
r.setTicketPrice(f[12]);
r.setInnerCode(f[13]);
r.setSaleTime(f[14]);
r.setOfficeNo(f[15]);
r.setWindowNo(f[16]);
r.setOperaterNo(f[17]);
r.setTicketNo(f[18]);
r.setStatisticsDate(f[19]);
r.setSequenceNo(f[20]);
r.setStatisticsFlag(f[21]);
r.setRelayTicketType(f[22]);
r.setSaleMode(f[23]);
r.setTicketState(f[24]);
r.setAreaCenterCode(f[25]);
r.setPk(HiveTableUtils.generatePrimaryKey(
r.getStatisticsDate(), r.getOfficeNo(),
r.getWindowNo(), r.getTicketNo()));
logger.debug("解析成功 SaleRecord:ticketNo={}, pk={}, areaCode={}",
r.getTicketNo(), r.getPk(), r.getAreaCenterCode());
out.collect(r);
} catch (Exception ex) {
logger.error("解析 SaleRecord 行失败,文件 {}:{}", fileName, raw, ex);
}
}
}
}
// --------------------- 调试版本的 Sink 实现 ---------------------
/**
* 调试版本的 InfoAppendix Sink - 打印详细的SQL和错误信息
*/
public static class DebugInfoAppendixSink extends RichSinkFunction<InfoAppendix> {
private static final Logger logger = LogManager.getLogger(DebugInfoAppendixSink.class);
private List<InfoAppendix> batch;
private static final int BATCH_SIZE = 1; // 调试时设为1,逐条处理
private boolean useHive = false;
private StreamTableEnvironment tableEnv;
private String partitionValue;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
batch = new ArrayList<>();
partitionValue = HiveTableUtils.generatePartitionValue();
// 检查是否启用Hive
useHive = ConfigManager.isHiveSinkEnabled();
logger.info("=== DebugInfoAppendixSink 初始化开始 ===");
logger.info("useHive: {}", useHive);
logger.info("partitionValue: {}", partitionValue);
if (useHive) {
try {
logger.info("开始创建 TableEnvironment...");
// 为每个Sink创建独立的TableEnvironment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
EnvironmentSettings settings = EnvironmentSettings.newInstance().inStreamingMode().build();
tableEnv = StreamTableEnvironment.create(env, settings);
logger.info("TableEnvironment 创建成功");
// 配置Hive Catalog
logger.info("开始配置 Hive Catalog...");
if (configureHiveCatalog()) {
logger.info("InfoAppendix Sink Hive配置成功");
// 测试表连接
testTableConnection();
} else {
logger.warn("InfoAppendix Sink Hive配置失败,使用日志模式");
useHive = false;
}
} catch (Exception e) {
logger.error("InfoAppendix Sink 初始化失败,使用日志模式", e);
useHive = false;
}
}
logger.info("=== DebugInfoAppendixSink 初始化完成,Hive模式: {} ===", useHive);
}
private boolean configureHiveCatalog() {
try {
String hiveDatabaseName = ConfigManager.getHiveDatabaseName();
logger.info("Hive数据库名: {}", hiveDatabaseName);
if (hiveDatabaseName == null || hiveDatabaseName.trim().isEmpty()) {
logger.warn("Hive数据库名为空");
return false;
}
String hiveConfDir = System.getProperty("user.dir") + "/src/main/resources/";
logger.info("Hive配置目录: {}", hiveConfDir);
java.io.File confDir = new java.io.File(hiveConfDir);
if (!confDir.exists()) {
logger.warn("Hive配置目录不存在: {}", hiveConfDir);
return false;
}
String hiveCatalogSql = String.format(
"CREATE CATALOG hive_catalog WITH (" +
"'type' = 'hive', " +
"'default-database' = '%s', " +
"'hive-conf-dir' = '%s'" +
")",
hiveDatabaseName, hiveConfDir
);
logger.info("执行Hive Catalog创建SQL: {}", hiveCatalogSql);
tableEnv.executeSql(hiveCatalogSql);
logger.info("使用Catalog: hive_catalog");
tableEnv.useCatalog("hive_catalog");
logger.info("使用数据库: {}", hiveDatabaseName);
tableEnv.useDatabase(hiveDatabaseName);
return true;
} catch (Exception e) {
logger.error("配置Hive Catalog失败: ", e);
return false;
}
}
private void testTableConnection() {
try {
String tableName = ConfigManager.getInfoAppendixTableName();
logger.info("测试表连接,表名: {}", tableName);
String testSql = "SELECT COUNT(*) FROM " + tableName + " LIMIT 1";
logger.info("执行测试SQL: {}", testSql);
KerberosUtils.doAs(() -> {
tableEnv.executeSql(testSql);
return null;
});
logger.info("表连接测试成功");
} catch (Exception e) {
logger.error("表连接测试失败: ", e);
throw new RuntimeException("表连接测试失败", e);
}
}
@Override
public void invoke(InfoAppendix value, Context ctx) {
if (value == null) return;
logger.info("=== 收到InfoAppendix数据 ===");
logger.info("ticketNo: {}, idName: {}, areaCode: {}, pk: {}",
value.getTicketNo(), value.getIdName(), value.getAreaCenterCode(), value.getPk());
if (!useHive) {
logger.info("Hive未启用,记录到日志: ticketNo={}, idName={}, areaCode={}",
value.getTicketNo(), value.getIdName(), value.getAreaCenterCode());
return;
}
batch.add(value);
if (batch.size() >= BATCH_SIZE) {
processBatch();
}
}
@Override
public void close() throws Exception {
if (batch != null && !batch.isEmpty()) {
processBatch();
}
super.close();
}
private void processBatch() {
if (batch.isEmpty()) return;
logger.info("=== 开始处理InfoAppendix批次,大小: {} ===", batch.size());
try {
String tableName = ConfigManager.getInfoAppendixTableName();
logger.info("目标表名: {}", tableName);
for (InfoAppendix info : batch) {
try {
String sql = String.format(
"INSERT INTO %s PARTITION (sdate='%s') VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')",
tableName, partitionValue,
escapeSqlValue(info.getStatistDate()),
escapeSqlValue(info.getStatisticsDate()),
escapeSqlValue(info.getInnerCode()),
escapeSqlValue(info.getOfficeNo()),
escapeSqlValue(info.getWindowNo()),
escapeSqlValue(info.getTicketNo()),
escapeSqlValue(info.getIdKind()),
escapeSqlValue(info.getIdNo()),
escapeSqlValue(info.getIdName()),
escapeSqlValue(info.getAreaCenterCode()),
escapeSqlValue(info.getPk())
);
logger.info("=== 执行SQL语句 ===");
logger.info("SQL: {}", sql);
KerberosUtils.doAs(() -> {
tableEnv.executeSql(sql);
return null;
});
logger.info("SQL执行成功: ticketNo={}", info.getTicketNo());
} catch (Exception e) {
logger.error("=== SQL执行失败 ===");
logger.error("ticketNo: {}", info.getTicketNo());
logger.error("错误类型: {}", e.getClass().getSimpleName());
logger.error("错误消息: {}", e.getMessage());
logger.error("详细堆栈: ", e);
// 回退到日志模式
logger.info("回退记录InfoAppendix数据: ticketNo={}, idName={}, areaCode={}",
info.getTicketNo(), info.getIdName(), info.getAreaCenterCode());
}
}
logger.info("InfoAppendix批量处理完成,批次大小: {}", batch.size());
} catch (Exception e) {
logger.error("InfoAppendix批量处理失败", e);
// 全部回退到日志模式
for (InfoAppendix info : batch) {
logger.info("回退记录InfoAppendix数据: ticketNo={}, idName={}, areaCode={}",
info.getTicketNo(), info.getIdName(), info.getAreaCenterCode());
}
} finally {
batch.clear();
}
}
}
/**
* 调试版本的 SaleRecord Sink - 打印详细的SQL和错误信息
*/
public static class DebugSaleRecordSink extends RichSinkFunction<SaleRecord> {
private static final Logger logger = LogManager.getLogger(DebugSaleRecordSink.class);
private List<SaleRecord> batch;
private static final int BATCH_SIZE = 1; // 调试时设为1,逐条处理
private boolean useHive = false;
private StreamTableEnvironment tableEnv;
private String partitionValue;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
batch = new ArrayList<>();
partitionValue = HiveTableUtils.generatePartitionValue();
// 检查是否启用Hive
useHive = ConfigManager.isHiveSinkEnabled();
logger.info("=== DebugSaleRecordSink 初始化开始 ===");
logger.info("useHive: {}", useHive);
logger.info("partitionValue: {}", partitionValue);
if (useHive) {
try {
logger.info("开始创建 TableEnvironment...");
// 为每个Sink创建独立的TableEnvironment
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
EnvironmentSettings settings = EnvironmentSettings.newInstance().inStreamingMode().build();
tableEnv = StreamTableEnvironment.create(env, settings);
logger.info("TableEnvironment 创建成功");
// 配置Hive Catalog
logger.info("开始配置 Hive Catalog...");
if (configureHiveCatalog()) {
logger.info("SaleRecord Sink Hive配置成功");
// 测试表连接
testTableConnection();
} else {
logger.warn("SaleRecord Sink Hive配置失败,使用日志模式");
useHive = false;
}
} catch (Exception e) {
logger.error("SaleRecord Sink 初始化失败,使用日志模式", e);
useHive = false;
}
}
logger.info("=== DebugSaleRecordSink 初始化完成,Hive模式: {} ===", useHive);
}
private boolean configureHiveCatalog() {
try {
String hiveDatabaseName = ConfigManager.getHiveDatabaseName();
logger.info("Hive数据库名: {}", hiveDatabaseName);
if (hiveDatabaseName == null || hiveDatabaseName.trim().isEmpty()) {
logger.warn("Hive数据库名为空");
return false;
}
String hiveConfDir = System.getProperty("user.dir") + "/src/main/resources/";
logger.info("Hive配置目录: {}", hiveConfDir);
java.io.File confDir = new java.io.File(hiveConfDir);
if (!confDir.exists()) {
logger.warn("Hive配置目录不存在: {}", hiveConfDir);
return false;
}
String hiveCatalogSql = String.format(
"CREATE CATALOG hive_catalog WITH (" +
"'type' = 'hive', " +
"'default-database' = '%s', " +
"'hive-conf-dir' = '%s'" +
")",
hiveDatabaseName, hiveConfDir
);
logger.info("执行Hive Catalog创建SQL: {}", hiveCatalogSql);
tableEnv.executeSql(hiveCatalogSql);
logger.info("使用Catalog: hive_catalog");
tableEnv.useCatalog("hive_catalog");
logger.info("使用数据库: {}", hiveDatabaseName);
tableEnv.useDatabase(hiveDatabaseName);
return true;
} catch (Exception e) {
logger.error("配置Hive Catalog失败: ", e);
return false;
}
}
private void testTableConnection() {
try {
String tableName = ConfigManager.getSaleRecordTableName();
logger.info("测试表连接,表名: {}", tableName);
String testSql = "SELECT COUNT(*) FROM " + tableName + " LIMIT 1";
logger.info("执行测试SQL: {}", testSql);
KerberosUtils.doAs(() -> {
tableEnv.executeSql(testSql);
return null;
});
logger.info("表连接测试成功");
} catch (Exception e) {
logger.error("表连接测试失败: ", e);
throw new RuntimeException("表连接测试失败", e);
}
}
@Override
public void invoke(SaleRecord value, Context ctx) {
if (value == null) return;
logger.info("=== 收到SaleRecord数据 ===");
logger.info("ticketNo: {}, trainCode: {}, areaCode: {}, pk: {}",
value.getTicketNo(), value.getBoardTrainCode(), value.getAreaCenterCode(), value.getPk());
if (!useHive) {
logger.info("Hive未启用,记录到日志: ticketNo={}, trainCode={}, areaCode={}",
value.getTicketNo(), value.getBoardTrainCode(), value.getAreaCenterCode());
return;
}
batch.add(value);
if (batch.size() >= BATCH_SIZE) {
processBatch();
}
}
@Override
public void close() throws Exception {
if (batch != null && !batch.isEmpty()) {
processBatch();
}
super.close();
}
private void processBatch() {
if (batch.isEmpty()) return;
logger.info("=== 开始处理SaleRecord批次,大小: {} ===", batch.size());
try {
String tableName = ConfigManager.getSaleRecordTableName();
logger.info("目标表名: {}", tableName);
for (SaleRecord record : batch) {
try {
String sql = String.format(
"INSERT INTO %s PARTITION (sdate='%s') VALUES " +
"('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')",
tableName, partitionValue,
escapeSqlValue(record.getStatistDate()),
escapeSqlValue(record.getTrainDate()),
escapeSqlValue(record.getBoardTrainCode()),
escapeSqlValue(record.getFromTeleCode()),
escapeSqlValue(record.getToTeleCode()),
escapeSqlValue(record.getFromStationName()),
escapeSqlValue(record.getToStationName()),
escapeSqlValue(record.getStartTime()),
escapeSqlValue(record.getCoachNo()),
escapeSqlValue(record.getSeatNo()),
escapeSqlValue(record.getSeatTypeCode()),
escapeSqlValue(record.getTicketType()),
escapeSqlValue(record.getTicketPrice()),
escapeSqlValue(record.getInnerCode()),
escapeSqlValue(record.getSaleTime()),
escapeSqlValue(record.getOfficeNo()),
escapeSqlValue(record.getWindowNo()),
escapeSqlValue(record.getOperaterNo()),
escapeSqlValue(record.getTicketNo()),
escapeSqlValue(record.getStatisticsDate()),
escapeSqlValue(record.getSequenceNo()),
escapeSqlValue(record.getStatisticsFlag()),
escapeSqlValue(record.getRelayTicketType()),
escapeSqlValue(record.getSaleMode()),
escapeSqlValue(record.getTicketState()),
escapeSqlValue(record.getAreaCenterCode()),
escapeSqlValue(record.getPk())
);
logger.info("=== 执行SQL语句 ===");
logger.info("SQL: {}", sql);
KerberosUtils.doAs(() -> {
tableEnv.executeSql(sql);
return null;
});
logger.info("SQL执行成功: ticketNo={}", record.getTicketNo());
} catch (Exception e) {
logger.error("=== SQL执行失败 ===");
logger.error("ticketNo: {}", record.getTicketNo());
logger.error("错误类型: {}", e.getClass().getSimpleName());
logger.error("错误消息: {}", e.getMessage());
logger.error("详细堆栈: ", e);
// 回退到日志模式
logger.info("回退记录SaleRecord数据: ticketNo={}, trainCode={}, areaCode={}",
record.getTicketNo(), record.getBoardTrainCode(), record.getAreaCenterCode());
}
}
logger.info("SaleRecord批量处理完成,批次大小: {}", batch.size());
} catch (Exception e) {
logger.error("SaleRecord批量处理失败", e);
// 全部回退到日志模式
for (SaleRecord record : batch) {
logger.info("回退记录SaleRecord数据: ticketNo={}, trainCode={}, areaCode={}",
record.getTicketNo(), record.getBoardTrainCode(), record.getAreaCenterCode());
}
} finally {
batch.clear();
}
}
}
/**
* SQL值转义
*/
private static String escapeSqlValue(String value) {
if (value == null) {
return "";
}
return value.replace("'", "''");
}
}这是我的代码
最新发布