数据流处理
DataStrea
java代码
package com.example;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
public class Hello02FlinkStream {
public static void main(String[] args) throws Exception {
//流运行的环境
StreamExecutionEnvironment environment = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
//绑定数据源
DataStreamSource<String> dataStream = environment.socketTextStream("localhost", 9999);
SingleOutputStreamOperator<String> words = dataStream.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String line, Collector<String> collector) throws Exception {
String[] words = line.split(" ");
for (String word : words) {
collector.collect(word);
}
}
});
SingleOutputStreamOperator<Tuple2<String, Integer>> pairs = words.map(new MapFunction<String, Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> map(String word) throws Exception {
return Tuple2.of(word, 1);
}
});
pairs.keyBy(0).sum(1).print();
//开启任务
environment.execute("Hello02FlinkStream" + System.currentTimeMillis());
}
}
scala
package com.example
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
object Hello04FlinkStream {
def main(args: Array[String]): Unit = {
val streamExecutionEnvironment = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration())
val lines = streamExecutionEnvironment.socketTextStream("localhost", 9999)
import org.apache.flink.api.scala._
lines.flatMap(_.split(" ")).map((_, 1)).keyBy(0).sum(1).print()
streamExecutionEnvironment.execute("Hello04FlinkStream" + System.currentTimeMillis())
}
}
nc -lp 9999
实时结果
3> (aiy,1)
8> (qwe,1)
5> (aaa,1)
3> (aiy,2)
2> (xxz,1)
本文介绍使用 Apache Flink 进行实时数据流处理的方法。通过 Java 和 Scala 两种语言实现从 localhost 的 9999 端口接收数据,并对数据进行分割和平铺处理,接着映射为元组并按关键词进行分组计数,最后打印实时处理结果。
1287

被折叠的 条评论
为什么被折叠?



