flume开发--自定义Sink

本文介绍如何使用Kafka自定义Sink将数据写入Aerospike数据库,并提供了一个具体的Java实现示例及配置说明。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

kafka可以通过自定义Sink的方式实现数据搜集并写入各种LOTP数据库,下面的例子是通过自定义Source实现数据写入分布式K-V数据库Aerospike.

1. 自定义Sink代码如下
[java]  view plain  copy
  1. package kafka_sink.asd;  
  2.   
  3. import java.io.IOException;  
  4. import java.net.ConnectException;  
  5. import java.util.ArrayList;  
  6. import java.util.List;  
  7. import java.util.Map;  
  8. import java.util.Properties;  
  9. import org.apache.flume.Channel;  
  10. import org.apache.flume.Constants;  
  11. import org.apache.flume.Context;  
  12. import org.apache.flume.Event;  
  13. import org.apache.flume.EventDeliveryException;  
  14. import org.apache.flume.Transaction;  
  15. import org.apache.flume.conf.Configurable;  
  16. import org.apache.flume.sink.AbstractSink;  
  17. import com.aerospike.client.AerospikeException;  
  18. import com.aerospike.client.Bin;  
  19. import com.aerospike.client.Key;  
  20. import com.aerospike.client.Record;  
  21. import com.aerospike.client.async.AsyncClient;  
  22. import com.aerospike.client.listener.RecordListener;  
  23. import com.aerospike.client.listener.WriteListener;  
  24. import com.aerospike.client.policy.WritePolicy;  
  25. import com.aerospike.client.async.AsyncClientPolicy;  
  26. import com.aerospike.client.policy.Policy;  
  27. import com.aerospike.client.Host;  
  28. import org.slf4j.Logger;  
  29. import org.slf4j.LoggerFactory;  
  30. import java.util.regex.Matcher;  
  31. import java.util.regex.Pattern;  
  32.   
  33. public class Asdsink extends AbstractSink implements Configurable {  
  34.   //private String myProp;  
  35.   public static final String TOPIC_HDR = "topic";  
  36.   public static final String KEY_HDR = "key";  
  37.   //private String mz_tag_topic;  
  38.   //private AerospikeClient asd_client;  
  39.   private String ASD_HOST1;  
  40.   private String ASD_HOST2;  
  41.   private int ASD_PORT;  
  42.   private String ASD_NAME_SPACE = "cm";  
  43.   private String MZ_SET_NAME;  
  44.   private String MZ_BIN_NAME;  
  45.   private int batchSize;// 一次事务的event数量,整体提交  
  46.   private WritePolicy write_policy;  
  47.   private Policy policy;  
  48.   //Async Read and Write  
  49.   private AsyncClient asd_async_client;  
  50.   private AsyncClientPolicy async_client_policy;  
  51.   private boolean completed;  
  52.   
  53.   
  54.   @Override  
  55.   public void configure(Context context) {  
  56.     //String myProp = context.getString("myProp", "defaultValue");  
  57.   
  58.     // Process the myProp value (e.g. validation)  
  59.   
  60.     // Store myProp for later retrieval by process() method  
  61.     //this.myProp = myProp;  
  62.     ASD_HOST1 =  context.getString("asd_host1""127.0.0.1");  
  63.     ASD_HOST2 =  context.getString("asd_host2""127.0.0.1");  
  64.     ASD_PORT =  context.getInteger("asd_port",3000);  
  65.     SET_NAME = context.getString("set_name""xxx");  
  66.     BIN_NAME = context.getString("bin_name""xxx");  
  67.     batchSize = context.getInteger("batchSize",1000);  
  68.     System.out.printf("ASD_HOST1:%s\n",ASD_HOST1);  
  69.     System.out.printf("ASD_HOST2:%s\n",ASD_HOST2);  
  70.     System.out.printf("ASD_PORT:%d\n",ASD_PORT);  
  71.     System.out.printf("SET_NAME:%s\n",SET_NAME);  
  72.     System.out.printf("BIN_NAME:%s\n",BIN_NAME);  
  73.     System.out.printf("batchSize:%d\n",batchSize);  
  74.       
  75.   }  
  76.   
  77.   @Override  
  78.   public void start() {  
  79.     // Initialize the connection to the external repository (e.g. HDFS) that  
  80.     // this Sink will forward Events to ..  
  81.   
  82.     Host[] hosts = new Host[] {new Host(ASD_HOST1, 3000),  
  83.                                new Host(ASD_HOST2, 3000)};  
  84.       
  85.     async_client_policy = new AsyncClientPolicy();  
  86.     async_client_policy.asyncMaxCommands = 300;  
  87.     async_client_policy.failIfNotConnected = true;  
  88.     asd_async_client = new AsyncClient(async_client_policy, hosts);  
  89.     policy = new Policy();  
  90.     policy.timeout = 20;   
  91.     write_policy = new WritePolicy();  
  92.     write_policy.timeout = 20;  
  93.   }  
  94.   
  95.   @Override  
  96.   public void stop () {  
  97.     // Disconnect from the external respository and do any  
  98.     // additional cleanup (e.g. releasing resources or nulling-out  
  99.     // field values) ..  
  100.       asd_async_client.close();  
  101.   }  
  102.   
  103.   @Override  
  104.   public Status process() throws EventDeliveryException {  
  105.       Status status = null;  
  106.         // Start transaction  
  107.   
  108.         Channel ch = getChannel();  
  109.         Transaction txn = ch.getTransaction();  
  110.         txn.begin();  
  111.         try {  
  112.           // This try clause includes whatever Channel operations you want to do  
  113.           long processedEvent = 0;  
  114.           for (; processedEvent < batchSize; processedEvent++) {  
  115.               Event event = ch.take();  
  116.                 
  117.               byte[] eventBody;  
  118.   
  119.               if(event != null)  
  120.               {  
  121.                   eventBody = event.getBody();  
  122.                   String line= new String(eventBody,"UTF-8");  
  123.                   if (line.length() > 0 )  
  124.                   {  
  125.                       String[] key_tag = line.split("\t");  
  126.                       if(key_tag.length == 2){  
  127.                           String tmp_key = key_tag[0];  
  128.                           String tmp_tag = key_tag[1];  
  129.                           Key as_key = new Key(ASD_NAME_SPACE, SET_NAME, tmp_key);  
  130.                           Bin ad_bin = new Bin(BIN_NAME, tmp_tag);  
  131.                           try{  
  132.                               completed = false;  
  133.                               asd_async_client.get(policy,new ReadHandler(asd_async_client,policy,write_policy, as_key, ad_bin), as_key);  
  134.                               waitTillComplete();  
  135.                           }  
  136.                           catch (Throwable t) {  
  137.                               System.out.println("[ERROR][process]"+ t.toString());  
  138.                           }  
  139.                       }  
  140.                   }  
  141.               }  
  142.           }  
  143.             
  144.             
  145.           // Send the Event to the external repository.  
  146.           // storeSomeData(e);  
  147.           status = Status.READY;  
  148.           txn.commit();  
  149.         } catch (Throwable t) {  
  150.             
  151.           txn.rollback();  
  152.           // Log exception, handle individual exceptions as needed  
  153.   
  154.           status = Status.BACKOFF;  
  155.           // re-throw all Errors  
  156.           if (t instanceof Error) {  
  157.               System.out.println("[ERROR][process]"+ t.toString());  
  158.             throw (Error)t;  
  159.           }  
  160.         }  
  161.   
  162.         txn.close();  
  163.         return status;                    
  164.   }  
  165.     
  166.     
  167.   private class WriteHandler implements WriteListener {  
  168.         private final AsyncClient client;  
  169.         private final WritePolicy policy;  
  170.         private final Key key;  
  171.         private final Bin bin;  
  172.         private int failCount = 0;  
  173.           
  174.         public WriteHandler(AsyncClient client, WritePolicy policy, Key key, Bin bin) {  
  175.             this.client = client;  
  176.             this.policy = policy;  
  177.             this.key = key;  
  178.             this.bin = bin;  
  179.         }  
  180.           
  181.         // Write success callback.  
  182.         public void onSuccess(Key key) {  
  183.             try {  
  184.                 // Write succeeded.                   
  185.             }  
  186.             catch (Exception e) {                 
  187.                 System.out.printf("[ERROR][WriteHandler]Failed to put: namespace=%s set=%s key=%s exception=%s\n",key.namespace, key.setName, key.userKey, e.getMessage());  
  188.             }  
  189.               
  190.             notifyCompleted();  
  191.         }  
  192.           
  193.         public void onFailure(AerospikeException e) {  
  194.            // Retry up to 2 more times.  
  195.          if (++failCount <= 2) {  
  196.             Throwable t = e.getCause();  
  197.               
  198.             // Check for common socket errors.  
  199.             if (t != null && (t instanceof ConnectException || t instanceof IOException)) {  
  200.                   //console.info("Retrying put: " + key.userKey);  
  201.                   try {  
  202.                       client.put(policy, this, key, bin);  
  203.                       return;  
  204.                   }  
  205.                   catch (Exception ex) {  
  206.                     // Fall through to error case.  
  207.                     System.out.printf("[ERROR][WriteHandler]Failed to put: namespace=%s set=%s key=%s bin_name=% bin_value=%s exception=%s\n",key.namespace, key.setName, key.userKey,bin.name,bin.value.toString(), e.getMessage());  
  208.                   }  
  209.             }  
  210.         }  
  211.           
  212.         notifyCompleted();  
  213.         }  
  214.     }  
  215.   
  216.     private class ReadHandler implements RecordListener {  
  217.         private final AsyncClient client;  
  218.         private final Policy policy;  
  219.         private final WritePolicy write_policy;  
  220.         private final Key key;  
  221.         private final Bin bin;  
  222.         private int failCount = 0;  
  223.           
  224.         public ReadHandler(AsyncClient client, Policy policy,WritePolicy write_policy, Key key, Bin bin) {  
  225.             this.client = client;  
  226.             this.policy = policy;  
  227.             this.write_policy = write_policy;  
  228.             this.key = key;  
  229.             this.bin = bin;  
  230.         }  
  231.                   
  232.         // Read success callback.  
  233.         public void onSuccess(Key key, Record record) {  
  234.               
  235.             try {  
  236.                 // Read succeeded.  Now call write.  
  237.                 if(record != null)  
  238.                 {  
  239.                     String str  = record.getString("mz_tag");  
  240.                       
  241.                      
  242.                     if(str != null  && str.length() > 0)    
  243.                     {  
  244.                         Pattern p101 = Pattern.compile("(101\\d{4})");  
  245.                         Pattern p102 = Pattern.compile("(102\\d{4})");  
  246.                         Pattern p103 = Pattern.compile("(103\\d{4})");  
  247.                         String tags="";  
  248.                         Matcher m101 = p101.matcher(str);  
  249.                         while (m701.find()) {  
  250.                             tags += ("," + m701.group(1));              
  251.                         }   
  252.                           
  253.                         Matcher m102 = p102.matcher(str);  
  254.                         while (m102.find()) {  
  255.                             tags += ( "," + m102.group(1));              
  256.                         }   
  257.                           
  258.                         Matcher m103 = p103.matcher(str);  
  259.                         while (m103.find()) {  
  260.                             tags += ( "," + m103.group(1));              
  261.                         }   
  262.                           
  263.                         if(tags.length() > 0)  
  264.                         {  
  265.                             String value_new = ( bin.value.toString() + tags);  
  266.                             Bin new_bin = new Bin("mz_tag", value_new);  
  267.                             client.put(write_policy,new WriteHandler(client,write_policy, key, new_bin), key,new_bin);  
  268.                         }  
  269.                         else  
  270.                         {  
  271.                             client.put(write_policy,new WriteHandler(client,write_policy, key, bin), key,bin);  
  272.                         }  
  273.                                       
  274.                     }  
  275.                     else  
  276.                     {  
  277.                         client.put(write_policy,new WriteHandler(client,write_policy, key, bin), key,bin);  
  278.                     }  
  279.                 }  
  280.                 else  
  281.                 {  
  282.                     client.put(write_policy,new WriteHandler(client,write_policy, key, bin), key,bin);  
  283.                 }  
  284.                   
  285.             }  
  286.             catch (Exception e) {                 
  287.                 System.out.printf("[ERROR][ReadHandler]Failed to get: namespace=%s set=%s key=%s exception=%s\n",key.namespace, key.setName, key.userKey, e.getMessage());  
  288.             }  
  289.           
  290.         }  
  291.   
  292.         // Error callback.  
  293.         public void onFailure(AerospikeException e) {  
  294.             // Retry up to 2 more times.  
  295.             if (++failCount <= 2) {  
  296.             Throwable t = e.getCause();  
  297.               
  298.             // Check for common socket errors.  
  299.             if (t != null && (t instanceof ConnectException || t instanceof IOException)) {  
  300.                   //console.info("Retrying get: " + key.userKey);  
  301.                   try {  
  302.                       client.get(policy, this, key);  
  303.                       return;  
  304.                   }  
  305.                   catch (Exception ex) {  
  306.                     // Fall through to error case.  
  307.                       System.out.printf("[ERROR][ReadHandler]Failed to get: namespace=%s set=%s key=%s exception=%s\n",key.namespace, key.setName, key.userKey, e.getMessage());  
  308.                   }  
  309.             }  
  310.         }  
  311.             notifyCompleted();  
  312.         }  
  313.     }  
  314.   
  315.       
  316.       
  317.     private synchronized void waitTillComplete() {  
  318.         while (! completed) {  
  319.             try {  
  320.                 super.wait();  
  321.             }  
  322.             catch (InterruptedException ie) {  
  323.             }  
  324.         }  
  325.     }  
  326.   
  327.     private synchronized void notifyCompleted() {  
  328.         completed = true;  
  329.         super.notify();  
  330.     }  
  331. }  

2. 用maven将自定义的Sink打包成jar包,maven xml配置文件如下

[html]  view plain  copy
  1. <?xml version="1.0" encoding="UTF-8"?>  
  2.   
  3. <project xmlns="http://maven.apache.org/POM/4.0.0"  
  4.          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"  
  5.          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">  
  6.   <modelVersion>4.0.0</modelVersion>  
  7.     
  8.   <groupId>org.apache.flume.flume-ng-sinks</groupId>  
  9.   <artifactId>flume-ng-aerospike-master-sink</artifactId>  
  10.   <name>Flume Kafka Sink</name>  
  11.   <version>1.0.0</version>  
  12.   <build>  
  13.     <plugins>  
  14.       <plugin>  
  15.         <groupId>org.apache.maven.plugins</groupId>  
  16.         <artifactId>maven-jar-plugin</artifactId>  
  17.       </plugin>  
  18.     </plugins>  
  19.   </build>  
  20.   
  21.   <dependencies>  
  22.     <dependency>  
  23.       <groupId>org.apache.flume</groupId>  
  24.       <artifactId>flume-ng-sdk</artifactId>  
  25.       <version>1.5.2</version>  
  26.     </dependency>  
  27.   
  28.     <dependency>  
  29.       <groupId>org.apache.flume</groupId>  
  30.       <artifactId>flume-ng-core</artifactId>  
  31.       <version>1.5.2</version>  
  32.     </dependency>  
  33.   
  34.     <dependency>  
  35.       <groupId>org.apache.flume</groupId>  
  36.       <artifactId>flume-ng-configuration</artifactId>  
  37.       <version>1.5.2</version>  
  38.     </dependency>  
  39.   
  40.     <dependency>  
  41.       <groupId>org.slf4j</groupId>  
  42.       <artifactId>slf4j-api</artifactId>  
  43.       <version>1.6.1</version>  
  44.     </dependency>  
  45.   
  46.     <dependency>  
  47.       <groupId>junit</groupId>  
  48.       <artifactId>junit</artifactId>  
  49.       <version>4.10</version>  
  50.       <scope>test</scope>  
  51.     </dependency>  
  52.      
  53.      
  54.     <dependency>  
  55.       <groupId>com.aerospike</groupId>  
  56.       <artifactId>aerospike-client</artifactId>  
  57.       <version>[3.0.0,)</version>  
  58.     </dependency>  
  59.         
  60.   </dependencies>  
  61.   
  62. </project>  

3. 将打包好的jar 包放到flume lib目录下
4. 在flume 的conf目录的配置文件中加入自定义Sink配置
[plain]  view plain  copy
  1. <span style="font-family:KaiTi_GB2312;">a1.sinks.k1_1.type = kafka_sink.asd.Asdsink  
  2. a1.sinks.k1_1.asd_host1 = 127.0.0.1  
  3. a1.sinks.k1_1.asd_host2 = 192.168.0.1  
  4. a1.sinks.k1_1.asd_port = 3000  
  5. a1.sinks.k1_1.set_name = test_set_name  
  6. a1.sinks.k1_1.bin_name = test_bin_name  
  7. a1.sinks.k1_1.batchSize =  10000</span>  


相关资料:
1. flume官网文档:http://flume.apache.org/FlumeDeveloperGuide.html
2. Apache Maven 入门篇(上):http://blog.youkuaiyun.com/yanshu2012/article/details/50722088
3. Apache Maven 入门篇(下):http://blog.youkuaiyun.com/yanshu2012/article/details/50722621

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值