编写Flink代码要求
1、source -> transformations ->sink
创建环境有两种
批处理:ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
流处理:StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment()
2、在批处理中flink处理的数据对象是DataSet
在流处理中Flink处理的数据对象是DataStream
transformation 算子都是懒执行,需要最后使用env.execute()触发执行或者使用print(),count(),collect*() 触发执行
我使用的是maven
pom文件
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.myflink</groupId>
<artifactId>MyFlinkCode</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>MyFlinkCode</name>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<exec.mainClass>com.myflink.Launcher</exec.mainClass>
<flink.version>1.7.1</flink.version>
</properties>
<dependencies>
<dependency>
<groupId>sk.upjs</groupId>
<artifactId>jpaz2</artifactId>
<version>1.1.1</version>
</dependency>
<!--flink依赖-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-wikiedits_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<!--flink kafka连接服务器的依赖-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<!--flink scala2.11 版本-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.11</artifactId>
<version>1.7.1</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.11.12</version>
</dependency>
<!--log4j he slf4j包 如果在控制台不想看到日志,可以将下面的包注释掉-->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.25</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.0.0</version>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>${exec.mainClass}</mainClass>
</transformer>
</transformers>
<minimizeJar>true</minimizeJar>
<finalName>MyFlinkCode</finalName>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
public class wc {
public static void main(String[] args) throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSource<String> dataSource = env.readTextFile("./data/words.txt");
FlatMapOperator<String, String> words = dataSource.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String line, Collector<String> collector) throws Exception {
String[] split = line.split(" ");
for (String word : split) {
collector.collect(word);
}
}
});
MapOperator<String, Tuple2<String,Integer>> pairWords = words.map(new MapFunction<String, Tuple2<String,Integer>>() {
@Override
public Tuple2<String,Integer> map(String word) throws Exception {
return new Tuple2<>(word, 1);
}
});
UnsortedGrouping<Tuple2<String,Integer>> groupBy = pairWords.groupBy(0);
AggregateOperator<Tuple2<String,Integer>> result= groupBy.sum(1);
result.print();
}
}