// Subscribe to 1 topic 从数据库读取偏移量。todo val words = spark .readStream .format("kafka") .option("kafka.bootstrap.servers", "host1:port1,host2:port2") .option("subscribe", "topic1") .load() .selectExpr("CAST(value AS STRING)")
.as[(String)]
// DataFrame 的一些API val wordCounts = words.groupBy("name").count()
val query = wordCounts.writeStream.trigger(ProcessingTime(5.seconds)) .outputMode("complete") .foreach(new ForeachWriter[Row] { var fileWriter: FileWriter = _ override def process(value: Row): Unit = {
//记录偏移量。 fileWriter.append(value.toSeq.mkString(",")) } override def close(errorOrNull: Throwable): Unit = { fileWriter.close() } override def open(partitionId: Long, version: Long): Boolean = { FileUtils.forceMkdir(new File(s"/tmp/example/${partitionId}")) fileWriter = new FileWriter(new File(s"/tmp/example/${partitionId}/temp")) true } }).start()
structured streaming ——wordcounts_kafka
最新推荐文章于 2024-01-13 18:03:53 发布