importorg.apache.spark.SparkConfimportorg.apache.spark.streaming.dstream.{DStream,ReceiverInputDStream}importorg.apache.spark.streaming.{Seconds,StreamingContext}//只算当前批次
object wordandcount {
def main(args:Array[String]):Unit={
val conf:SparkConf=newSparkConf().setMaster("local[*]").setAppName(this.getClass.getName)
val ssc:StreamingContext=newStreamingContext(conf,Seconds(5))
val lines:ReceiverInputDStream[String]= ssc.socketTextStream("linux01",8888)
val words:DStream[String]= lines.flatMap(_.split(" "))
val wordandcount:DStream[(String,Int)]= words.map((_,1))
val reduced:DStream[(String,Int)]= wordandcount.reduceByKey(_ + _)
reduced.print()
ssc.start()
ssc.awaitTermination()}}
importorg.apache.spark.SparkConfimportorg.apache.spark.streaming.dstream.{DStream,ReceiverInputDStream}importorg.apache.spark.streaming.{Seconds,StreamingContext}//累加历史批次:将当前数据和历史数据(状态)进行累加
object wordandcount {
def main(args:Array[String]):Unit={
val conf:SparkConf=newSparkConf().setMaster("local[*]").setAppName(this.getClass.getName)
val ssc:StreamingContext=newStreamingContext(conf,Seconds(5))
ssc.sparkContext.setLogLevel("ERROR")
ssc.checkpoint("./ck")
val lines:ReceiverInputDStream[String]= ssc.socketTextStream("linux01",8889)
val words:DStream[String]= lines.flatMap(_.split(" "))
val wordandcount:DStream[(String,Int)]= words.map((_,1))//val reduced: DStream[(String, Int)] = wordandcount.reduceByKey(_ + _)
def updataFunc=(seq:Seq[Int],op:Option[Int])=>{Some(seq.sum+op.getOrElse(0))}
val reduced:DStream[(String,Int)]= wordandcount.updateStateByKey(updataFunc)
reduced.print()
ssc.start()
ssc.awaitTermination()}}