Kafka---将kafka中的数据导入HBase

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;


import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Properties;

/**
 * @Author shall潇
 * @Date 2021/5/31
 * @Description  将 kafka topic数据导入 hbase
 */
public class UserFriendToHB {
    static int num = 0;
    public static void main(String[] args) {
        //1.kafka 消费端属性配置
        Properties properties = new Properties();
        properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.159.100:9092");
        properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        properties.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG,"30000");

        //是否自动提交,获取数据的状态 false 是手动提交,true是自动提交,时间间隔1000
        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");
        properties.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,"1000");
        properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
        properties.put(ConsumerConfig.GROUP_ID_CONFIG,"user_friend_group");
        
        //订阅对应主题的消息
        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(properties);
        consumer.subscribe(Collections.singleton("user_friends"));

        //2.HBase 配置
        Configuration conf = HBaseConfiguration.create();
        conf.set(HConstants.HBASE_DIR,"hdfs://192.168.159.100:9000/hbase");
        conf.set("hbase.zookeeper.quorum","192.168.159.100");
        conf.set("hbase.zookeeper.property.clientPort","2181");
        Connection connection = null;
        try {
            connection = ConnectionFactory.createConnection(conf);
            Admin admin = connection.getAdmin();
            Table table = connection.getTable(TableName.valueOf("event_db:user_friend"));

            while (true){
            	//3.处理读到的record,并且put到HBase中的表中
                ConsumerRecords<String, String> records = consumer.poll(100);
                List<Put> datas = new ArrayList<>();
                for (ConsumerRecord<String, String> record : records) {
                    System.out.println(record.value());
                    String[] split = record.value().split(",");
                    Put put = new Put(Bytes.toBytes((split[0] + split[1]).hashCode()));
                    put.addColumn("uf".getBytes(),"userid".getBytes(),split[0].getBytes());
                    put.addColumn("uf".getBytes(),"friendid".getBytes(),split[1].getBytes());
                    datas.add(put);
                }
                num+=datas.size();
                System.out.println(num+"行");
                table.put(datas);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
Flume是一个分布式、可靠且可用的系统,用于高效地收集、聚合和移动大量日志数据Kafka是一个高吞吐量的分布式消息系统。将Flume与Kafka结合使用,可以实现实时数据流的采集和传输。 以下是将Flume采集的实时数据流存入Kafka指定Topic中的步骤: ### 1. 配置Flume Agent 首先,需要配置Flume Agent以从数据源收集数据并将其发送到Kafka。以下是一个示例配置文件`flume.conf`: ```properties # 定义Agent的名称和组件 agent1.sources = source1 agent1.sinks = sink1 agent1.channels = channel1 # 配置数据源 agent1.sources.source1.type = netcat agent1.sources.source1.bind = localhost agent1.sources.source1.port = 44444 # 配置Kafka Sink agent1.sinks.sink1.type = org.apache.flume.sink.kafka.KafkaSink agent1.sinks.sink1.topic = mytopic agent1.sinks.sink1.brokerList = localhost:9092 agent1.sinks.sink1.requiredAcks = 1 agent1.sinks.sink1.batchSize = 20 # 配置Channel agent1.channels.channel1.type = memory agent1.channels.channel1.capacity = 1000 agent1.channels.channel1.transactionCapacity = 100 # 绑定Source和Sink到Channel agent1.sources.source1.channels = channel1 agent1.sinks.sink1.channel = channel1 ``` ### 2. 启动Flume Agent 使用以下命令启动Flume Agent: ```sh flume-ng agent --conf /path/to/conf --conf-file /path/to/flume.conf --name agent1 -Dflume.root.logger=INFO,console ``` ### 3. 启动Kafka 确保Kafka服务已经启动,并且指定的Topic已经创建。如果没有创建,可以使用以下命令创建: ```sh kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic mytopic ``` ### 4. 验证数据流 可以使用Kafka提供的消费者工具来验证数据是否已经成功发送到Kafka: ```sh kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic mytopic --from-beginning ``` 通过以上步骤,Flume采集的实时数据流将被存入Kafka指定的Topic中。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值