Spark Streaming wordcount（java程序发送请求）

最新推荐文章于 2025-05-13 23:20:32 发布

KerryZXC

最新推荐文章于 2025-05-13 23:20:32 发布

阅读量406

点赞数 1

分类专栏： Spark 文章标签： spark 大数据

本文链接：https://blog.youkuaiyun.com/qq_41495340/article/details/105937538

版权

Spark 专栏收录该内容

2 篇文章

订阅专栏

要求：将nc -lk 9999指令用Java的ServerSocket实现：即通过Java程序实现ServerSocket的9999端监听，作为Spark Streaming的Socket流，用户可以通过控制台输入信息后发送给Sparkstreaming进行处理。

import java.io.*;
import java.net.ServerSocket;
import java.net.Socket;
public class SparkStreamingTest {
    public static void main(String args[])  {
        sendTCP();
    }
    public static void sendTCP()  {
        try {
            int port = 9999;
            ServerSocket server = new ServerSocket(port);
            //只有服务器被客户端连接后才会执行后面的语句
            System.out.println("服务器正在监听");
            Socket client = server.accept();
            System.out.println(client.getInetAddress() + "已建立连接! ");
            //输入流
            //  InputStream is = client.getInputStream();
            //使用System.in 创建BufferedReader
            BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
            //输出流
            OutputStream os = client.getOutputStream();
            PrintWriter pw = new PrintWriter(os,true);
            System.out.println("Enter lines of text.");
            String str;
            do {
                str = br.readLine();

                // PrintWriter把数据写到目的地
                pw.println(str);
                System.out.println(str);
            } while (!str.equals("end"));

        } catch (Exception e) {
            System.out.println("connection exit!");
            System.out.println();
        }
    }
}

import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.ReceiverInputDStream
import org.apache.spark.streaming.{Seconds, StreamingContext}

/**
 * @官方入门案例a-quick-example: http://spark.apache.org/docs/latest/streaming-programming-guide.html#a-quick-example
 */
object NetworkWordCount {
  def main(args: Array[String]): Unit = {
    //0编程入口StreamingContext：A StreamingContext object can be created from a SparkConf object.
    val conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount")
    //实时数据分析环境对象
    //以指定的时间为周期采集实时数据
    val ssc = new StreamingContext(conf, Seconds(4))

    // 1.Define the input sources by creating input DStreams.
    // Create a DStream that will connect to hostname:port, like localhost:9999
    //从指定端口中采集数据
    val lines:ReceiverInputDStream[String] = ssc.socketTextStream("127.0.0.1", 9999)

    // 2.Define the streaming computations by applying transformation and output operations to DStreams.
    // Split each line into words
    //将采集的数据进行分解
    val words = lines.flatMap(_.split(" "))
    import org.apache.spark.streaming.StreamingContext._ // not necessary since Spark 1.3
    // Count each word in each batch
    //将转换结构后的数据进行聚合处理map
    val pairs = words.map(word => (word, 1))
    val wordCounts = pairs.reduceByKey(_ + _)

    // Print the first ten elements of each RDD generated in this DStream to the console
    wordCounts.print()


    // 3.Start receiving data and processing it using streamingContext.start().
    ssc.start()             // Start the computation

    // 4. Wait for the processing to be stopped (manually or due to any error) using streamingContext.awaitTermination().
    ssc.awaitTermination()  // Wait for the computation to terminate

    // 5不会被执行. The processing can be manually stopped using streamingContext.stop().
    ssc.stop(true)
  }

}