前言
0.闲话少说,直接上代码
1.自定义的Kafka生产者实时向Kafka发送模拟数据;
2.Streaming使用Direct模式拉取Kafka中数据,经处理后存入HBase.
一、依赖文件(注意HBase版本对应)
<!-- 指定仓库位置,依次为aliyun、cloudera和jboss仓库 -->
<repositories>
<repository>
<id>aliyun</id>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
</repository>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<encoding>UTF-8</encoding>
<scala.version>2.11.8</scala.version>
<scala.compat.version>2.11</scala.compat.version>
<hadoop.version>2.7.4</hadoop.version>
<spark.version>2.2.0</spark.version>
</properties>
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.4</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.0-cdh5.14.0</version>
</dependency>
</dependencies>
二、Producer
package kafka_streaming_hbase
import java.util.Properties
import org.apache.kafka.clients.producer.{
KafkaProducer, ProducerRecord}
/*
* @Auther Yang
* @Date 13:56 2020/10/10
* 向指定的Kafka主题发送模拟数据
*/
object ProducerTest {
def main(args: Array[String]): Unit = {
//kafka参数配置
val topic = "MyTest"
val prop = new Properties()
prop.put("bootstrap.servers", "node01:9092")
prop.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
prop.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
//创建Kafka的Producer
val producer = new KafkaProducer[String, String](prop)
val content: Array[String] = new Array[String](5)
content(0) = "apache hadoop hive"