1、创建测试的 Topic
kafka启动脚本(启动kafka之前,必须先启动zookeeper & 集群)
bin/kafka-server-start.sh config/server.properties > /dev/null 2>&1 &
创建topic
bin/kafka-topics.sh --create --zookeeper hadoop100:2181 --replication-factor 1 --partitions 1 --topic structrued_streaming_1
查看topic是否创建成功
bin/kafka-topics.sh --list --zookeeper hadoop100:2181
启动生产者 发送测试数据
bin/kafka-console-producer.sh --broker-list hadoop100:9092 --topic structrued_streaming_1
2、编写Structrued Streaming作为消费者代码
package com.itcast.sql
import org.apache.spark.sql.SparkSession
object Structrued_Kafka {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.master("local[6]")
.appName("structrued kafka")
.getOrCreate()
spark.sparkContext.setLogLevel("error")
import spark.implicits._
//get source
val source = spark.readStream
.format("kafka")
.option("kafka.bootstrap.servers", "hadoop100:9092")
.option("subscribe", "structrued_streaming_1")
.option("startingOffsets", "earliest")
.load()
.selectExpr("CAST(value as STRING) as value")
.as[String]
// 1::Toy Story::Animation|Children's|Comedy
val resultDF = source.map(item => {
val words = item.split("::")
(words(0).toInt, words(1).toString, words(2).toString)
}).as[(Int, String, String)].toDF("id", "name", "category")
//sink
resultDF.writeStream
// .format("parquet")
// .option("path", "data/kafka/")
// .option("checkpointLocation", "data/checkpoint/")
.format("console")
.start()
.awaitTermination()
}
}