flume之source自定义offset(保证flume数据不丢失和一致性)

本文介绍了一种自定义Apache Flume数据源的方法,通过创建并实现特定接口,该数据源能够从文件中读取数据,并使用偏移量进行状态跟踪,确保数据的准确性和完整性。同时,文章提供了详细的代码实现,包括如何配置Flume源,以及如何在项目中引入Maven依赖。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

引入的maven依赖

 
  1. <dependency>

  2. <groupId>org.apache.flume</groupId>

  3. <artifactId>flume-ng-core</artifactId>

  4. <version>1.8.0</version>

  5. <!-- 开发时引入依赖,打包是不包含依赖 -->

  6. <scope>provided</scope>

  7. </dependency>

代码

 
  1. public class MyFlumeSource extends AbstractSource implements Configurable,EventDrivenSource{

  2. private static final Logger logger = LoggerFactory.getLogger(MyFlumeSource.class);

  3. //数据源的文件

  4. private String filePath;

  5. //保存偏移量的文件

  6. private String posiFile;

  7. //等待时长

  8. private Long interval;

  9. //编码格式

  10. private String charset;

  11. private FileRunnable fileRunnable;

  12. private ExecutorService pool;

  13.  
  14. /**

  15. * 初始化Flume配置信息

  16. * @param context

  17. */

  18. public void configure(Context context) {

  19. filePath = context.getString("filePath");

  20. posiFile = context.getString("posiFile");

  21. interval = context.getLong("interval",2000L);

  22. charset = context.getString("charset","UTF-8");

  23. }

  24.  
  25. @Override

  26. public synchronized void start() {

  27. pool = Executors.newSingleThreadExecutor();

  28. fileRunnable = new FileRunnable(filePath,posiFile,interval,charset,getChannelProcessor());

  29. pool.execute(fileRunnable);

  30. super.start();

  31. }

  32.  
  33. @Override

  34. public synchronized void stop() {

  35. fileRunnable.setFlag(false);

  36. pool.shutdown();

  37. while (!pool.isTerminated()) {

  38. logger.debug("Waiting for exec executor service to stop");

  39. try {

  40. pool.awaitTermination(500, TimeUnit.MILLISECONDS);

  41. } catch (InterruptedException e) {

  42. logger.debug("Interrupted while waiting for exec executor service "

  43. + "to stop. Just exiting.");

  44. Thread.currentThread().interrupt();

  45. }

  46. }

  47. super.stop();

  48. }

  49. private static class FileRunnable implements Runnable{

  50. private boolean flag = true;

  51. //偏移量

  52. private Long offset =0L;

  53. private Long interval;

  54. private String charset;

  55. //可以直接从偏移量开始读取数据

  56. private RandomAccessFile randomAccessFile;

  57. //可以发送给channel的工具类

  58. private ChannelProcessor channelProcessor;

  59. private File file;

  60.  
  61. public void setFlag(boolean flag) {

  62. this.flag = flag;

  63. }

  64.  
  65. public FileRunnable(String filePath, String posiFile, Long interval, String charset, ChannelProcessor channelProcessor) {

  66. this.interval = interval;

  67. this.charset = charset;

  68. this.channelProcessor = channelProcessor;

  69. file = new File(posiFile);

  70. if (!file.exists()){

  71. try {

  72. file.createNewFile();

  73. } catch (IOException e) {

  74. logger.error("create posiFile file error",e);

  75. }

  76. }

  77. try {

  78. String offsetStr = FileUtils.readFileToString(file);

  79. if (offsetStr != null && !"".equals(offsetStr)){

  80. offset = Long.parseLong(offsetStr);

  81. }

  82. } catch (IOException e) {

  83. logger.error("read posiFile file error",e);

  84. }

  85. try {

  86. randomAccessFile = new RandomAccessFile(filePath,"r");

  87. randomAccessFile.seek(offset);

  88.  
  89. } catch (FileNotFoundException e) {

  90. logger.error("read filePath file error",e);

  91. } catch (IOException e) {

  92. logger.error("randomAccessFile seek error",e);

  93. }

  94. }

  95.  
  96. public void run() {

  97. while (flag){

  98. try {

  99. String line = randomAccessFile.readLine();

  100. if (line != null){

  101. //向channel发送数据

  102. channelProcessor.processEvent(EventBuilder.withBody(line, Charset.forName(charset)));

  103. offset = randomAccessFile.getFilePointer();

  104. FileUtils.writeStringToFile(file,offset.toString());

  105. }else {

  106. Thread.sleep(interval);

  107. }

  108. } catch (IOException e) {

  109. logger.error("read randomAccessFile error",e);

  110. } catch (InterruptedException e) {

  111. logger.error("sleep error",e);

  112. }

  113. }

  114. }

  115. }

  116. }

将项目打成jar包放到flume的lib目录下就可以了

flume启动的配置文件a1.sources.r1.type=包名.类名

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值