package com.spark.streaming
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
/**
* sparkStreming开窗函数---统计一定时间内单词出现的次数
*/
object SparkStreamingTCPWindow {
def main(args: Array[String]): Unit = {
//配置sparkConf参数
val sparkConf = new SparkConf().setAppName("SparkStreamingTCPWindow").setMaster("local[2]")
//构建sparkContext对象
val sc = new SparkContext(sparkConf)
sc.setLogLevel("WARN")
//构建StreamingContext对象,每个批处理的时间间隔
val scc = new StreamingContext(sc,Seconds(5))
//注册一个监听的IP地址和端口 用来收集数据
val lines = scc.socketTextStream("star.com",9999)
//切分每一行记录
val words = lines.flatMap(_.split(" "))
//每个单词记为1
val wordAndOne = words.map((_,1))
//reduceByKeyAndWindow函数参数意义:
// windowDuration:表示window框住的时间长度,如本例5秒切分一次RDD,框10秒,就会保留最近2次切分的RDD
//slideDuration: 表示window滑动的时间长度,即每隔多久执行本计算
val result = wordAndOne.reduceByKeyAndWindow((a:Int,b:Int)=>a+b,Seconds(10),Seconds(5))
result.print()
scc.start()
scc.awaitTermination()
}
}
Linux中nc的安装和作用:https://blog.youkuaiyun.com/star5610/article/details/106579382
测试命令 :nc -lk 9999
输入数据 在控制台输出即为成功