Flink入门-单词统计程序开发（DataStream流处理方式）

Mr Tang

已于 2022-11-11 14:31:06 修改

阅读量1k

点赞数

分类专栏：大数据文章标签： flink 大数据

于 2022-10-31 17:44:28 首次发布

本文链接：https://blog.youkuaiyun.com/qq_45443475/article/details/127618691

版权

大数据专栏收录该内容

2 篇文章

订阅专栏

一.使用maven 命令构建flink工程(或者通过idea构建也可以)

一.使用maven 命令构建flink工程(或者通过idea构建也可以)

1.flink工程构建命令

mvn archetype:generate -DarchetypeGroupId=org.apache.flink -DarchetypeArtifactId=flink-quickstart-java -DarchetypeVersion=1.9.0

2.项目工程目录结构如下

二.编写wordcount的代码

1.项目结构介绍

BatchJob为批处理

StreamingJob为流处理

我们这里使用流处理

2.流处理步骤介绍

//1.设置执行环境
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

//2.定义数据源（可以监听端口或者定义kafka数据源等等）
//win下打开cmd 执行nc -lp 9000打开端口，输入数据即可
DataStream<String> text = env.socketTextStream("127.0.0.1",9000,"\n");

//3.数据的转换、处理

//4.执行flink程序
env.execute("Flink Streaming Java API Skeleton");

3.具体代码

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.tangbb.cold.flink;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;

/**
 * Skeleton for a Flink Streaming Job.
 *
 * <p>For a tutorial how to write a Flink streaming application, check the
 * tutorials and examples on the <a href="http://flink.apache.org/docs/stable/">Flink Website</a>.
 *
 * <p>To package your application into a JAR file for execution, run
 * 'mvn clean package' on the command line.
 *
 * <p>If you change the name of the main class (with the public static void main(String[] args))
 * method, change the respective entry in the POM.xml file (simply search for 'mainClass').
 */
public class StreamingJob {

	public static void main(String[] args) throws Exception {
		// set up the streaming execution environment 流的运行时环境
		//1.设置执行环境
		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		//2.定义数据源（可以监听端口或者定义kafka数据源等等）
		//win下打开cmd 执行nc -lp 9000打开端口，输入数据即可
		DataStream<String> text = env.socketTextStream("127.0.0.1",9000,"\n");

		//3.数据的转换、处理
		DataStream<WordWithCount> wordWithCountDataStream = text.flatMap(new FlatMapFunction<String, WordWithCount>() {
			@Override
			public void flatMap(String s, Collector<WordWithCount> collector) throws Exception {
				for (String word: s.split("\\s")){
					collector.collect(new WordWithCount(word,1));
				}
			}
		})
				.keyBy("word")
				.timeWindow(Time.seconds(5))
				.reduce(new ReduceFunction<WordWithCount>() {
					@Override
					public WordWithCount reduce(WordWithCount wordWithCount, WordWithCount t1) throws Exception {
						return new WordWithCount(wordWithCount.word,wordWithCount.count+t1.count);
					}
				});

		//3.打印
		wordWithCountDataStream.print().setParallelism(1);

		//4.执行flink程序
		env.execute("Flink Streaming Java API Skeleton");
	}

	/**
	 * POJO
	 */
	public static class WordWithCount{
		public String word;
		public long count;
		public WordWithCount(){}

		public WordWithCount(String word, long count) {
			this.word = word;
			this.count = count;
		}

		public String getWord() {
			return word;
		}

		public void setWord(String word) {
			this.word = word;
		}

		public long getCount() {
			return count;
		}

		public void setCount(long count) {
			this.count = count;
		}

		@Override
		public String toString() {
			return "WordWithCount{" +
					"word='" + word + '\'' +
					", count=" + count +
					'}';
		}
	}
}