1、创建kafka的topic并且插入数据
[root@henry ~]
[root@henry ~]
>aaa
>bbb
>ccc
>hello,world
2、使用sparkstreaming读取kafka中的数据
- 创建maven的quickstart工程,并且导入以下依赖
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka -->
<!-- 2.11是scala的版本 -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>2.0.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.3.4</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.3.4</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-graphx_2.11</artifactId>
<version>2.3.4</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.6.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-streaming -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.3.4</version>
</dependency>
<dependency><!-- Spark Streaming Kafka -->
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
<version>2.3.4</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.google.guava/guava -->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>14.0.1</version>
</dependency>
</dependencies>
package com.njbdqn
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
object MyDataHandler {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("ttt")
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
val sc = new SparkContext(conf);
val ssc = new StreamingContext(sc,Seconds(5))
ssc.checkpoint("E:\\BigDataStudy\\SparkStreaming\\cks1")
val kafkaParams = Map(
ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "192.168.153.200:9092",
ConsumerConfig.GROUP_ID_CONFIG -> "henry1",
ConsumerConfig.MAX_POLL_RECORDS_CONFIG -> "1000",
ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> "true",
ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "earliest"
)
val ds=KafkaUtils.createDirectStream(ssc,LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String,String](Set("mmm"),kafkaParams))
ds.print()
ssc.start()
ssc.awaitTermination()
}
}
-------------------------------------------
Time: 1607942710000 ms
-------------------------------------------
ConsumerRecord(topic = mmm, partition = 0, offset = 0, CreateTime = 1607936175351, serialized key size = -1, serialized value size = 3, headers = RecordHeaders(headers = [], isReadOnly = false), key = null, value = aaa)
ConsumerRecord(topic = mmm, partition = 0, offset = 1, CreateTime = 1607936176513, serialized key size = -1, serialized value size = 3, headers = RecordHeaders(headers = [], isReadOnly = false), key = null, value = bbb)
ConsumerRecord(topic = mmm, partition = 0, offset = 2, CreateTime = 1607936178581, serialized key size = -1, serialized value size = 3, headers = RecordHeaders(headers = [], isReadOnly = false), key = null, value = ccc)
ConsumerRecord(topic = mmm, partition = 0, offset = 3, CreateTime = 1607936182416, serialized key size = -1, serialized value size = 11, headers = RecordHeaders(headers = [], isReadOnly = false), key = null, value = hello,world)
-------------------------------------------
Time: 1607942715000 ms
-------------------------------------------
-------------------------------------------
Time: 1607942720000 ms
-------------------------------------------