目录
三、SparkStreaming: kafkaSource to kafkaSink
一、nc -lk 端口号连接sparkStreaming
import org.apache.spark.SparkConf
import org.apache.spark.sql.catalyst.expressions.Second
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.{Seconds, StreamingContext}
object SparkStreamDemo1 {
def main(args: Array[String]): Unit = {
val sparkConf: SparkConf = new SparkConf().setMaster("local[2]").setAppName("sparkstream1")
//定义流,采集周期3秒
val streamingContext = new StreamingContext(sparkConf, Seconds(3))
// 配置数据源为指定机器和端口
val socketLineStream: ReceiverInputDStream[String] = streamingContext.socketTextStream("192.168.91.180", 8888)
//业务处理
val wordStream: DStream[String] = socketLineStream.flatMap(x => x.split("\\s+"))
val mapStream: DStream[(String, Int)] = wordStream.map((_, 1))
val wordcountStream: DStream[(String, Int)] = mapStream.reduceByKey(_ + _)
//输出结果
wordcountStream.print()
//启动采集器
streamingContext.start()
streamingContext.awaitTermination()
}
}
[root@reagan180 ~] nc -lk 8888
二、sparkStreaming : kafka订阅主题
import org.apache.