不解释—–
package org.apache.spark.examples.sql.streaming
import org.apache.spark.sql.functions._
import org.apache.spark.sql.SparkSession
object StructuredNetworkWordCount {
def main(args: Array[String]) {
if (args.length < 2) {
System.err.println("Usage: StructuredNetworkWordCount <hostname> <port>")
System.exit(1)
}
val host = args(0)
val port = args(1).toInt
//sparksql里面的
val spark = SparkSession
.builder
.appName("StructuredNetworkWordCount")
.getOrCreate()
import spark.implicits._
//创建DataFrame
val lines = spark.readStream
.format("socket")
.option("host", host)
.option("port", port)
.load().as[String]
// 切分字符串
val words = lines.flatMap(_.split(" "))
//分组
val wordCounts = words.groupBy("value").count()
// Start running the query that prints the running counts to the console
val query = wordCounts.writeStream
.outputMode("complete")
.format("console")
.start()
query.awaitTermination()
}
}