5 Flink 流处理API

5 Flink 流处理API

  • Enviroment
    package env;
    
    import org.apache.flink.api.java.ExecutionEnvironment;
    import org.apache.flink.streaming.api.environment.LocalStreamEnvironment;
    import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
    
    /**
     * @author wangkai
     */
    public class Env {
        public static void main(String[] args) {
            /**
             * 流式执行环境
             * 创建一个执行环境,表示当前执行程序的上下文。 如果程序是独立调用的,则
             * 此方法返回本地执行环境;如果从命令行客户端调用程序以提交到集群,则此方法
             * 返回此集群的执行环境,也就是说,getExecutionEnvironment 会根据查询运行的方
             * 式决定返回什么样的运行环境,是最常用的一种创建执行环境的方式
             * */
            StreamExecutionEnvironment StreamEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
    
            /**
             * 批处理执行环境
             * */
            ExecutionEnvironment BatchEnvironment1 = ExecutionEnvironment.getExecutionEnvironment();
    
            /**
             *
             *创建本地执行环境
             * */
            LocalStreamEnvironment localEnvironment = StreamExecutionEnvironment.createLocalEnvironment();
    
    
            /**
             * 创建远程执行环境
             * */
            StreamExecutionEnvironment remoteEnvironment = StreamExecutionEnvironment.createRemoteEnvironment("xxx", 1234, "xxx.jar");
    
        }
    }
    
    
  • Source
    package source;
    
    import org.apache.flink.api.common.serialization.SimpleStringSchema;
    import org.apache.flink.streaming.api.datastream.DataStreamSource;
    import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
    import org.apache.flink.streaming.api.functions.source.SourceFunction;
    import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
    
    import java.util.Arrays;
    import java.util.Properties;
    
    /**
     * @author wangkai 
     */
    public class Source {
        public static void main(String[] args) {
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    
            /**
             * 1 从集合读取
             * */
            DataStreamSource<String> source = env.fromCollection(Arrays.asList("zs", "ls", "wmz"));
    
            /**
             * 2 从文件读取数据
             * */
            DataStreamSource<String> source1 = env.readTextFile("path");
    
            /**
             * 3 从kafka读取数据
             * */
            Properties properties = new Properties();
            properties.setProperty("bootstrap.servers","xxx");
            properties.setProperty("group.id","xxx");
    
            DataStreamSource<String> source2 = env.addSource(new FlinkKafkaConsumer011<String>("topic", new SimpleStringSchema(), properties));
            
            /**
             *
             * 4 自定义source
             * */
            DataStreamSource<String> source3 = env.addSource(new MySourceFunction());
    
        }
    
        public static class MySourceFunction implements SourceFunction<String> {
    
            private boolean isRunning = true;
    
            public void run(SourceContext<String> ctx) throws Exception {
                while(isRunning){
                    System.out.println("自定义数据源");
                }
    
            }
    
            public void cancel() {
                isRunning = false;
    
            }
        }
    }
    
    
  • Transform
    • 转换算子

      package transform;
      
      import com.alibaba.fastjson.JSONObject;
      import org.apache.flink.api.common.functions.FilterFunction;
      import org.apache.flink.api.common.functions.FlatMapFunction;
      import org.apache.flink.api.common.functions.MapFunction;
      import org.apache.flink.api.java.functions.KeySelector;
      import org.apache.flink.api.java.tuple.Tuple2;
      import org.apache.flink.streaming.api.datastream.DataStreamSource;
      import org.apache.flink.streaming.api.datastream.KeyedStream;
      import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
      import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
      import org.apache.flink.util.Collector;
      
      /**
       * @author wangkai 
       */
      public class TransForm {
          public static void main(String[] args) {
              StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
      
              DataStreamSource<String> source = env.socketTextStream("", 7777);
      
      
              source.map(new MapFunction<String, String>() {
                  /**
                   * map算子,接受一个元素并产生一个元素
                   * */
                             public String map(String value) throws Exception {
                                 Object parse = JSONObject.parse(value);
                                 return parse.toString();
                             }
                         }
             );
      
      
              SingleOutputStreamOperator<Tuple2<String, Integer>> flatMapSource = source.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
                  /**
                   * flatMap 算子:接受一个元素并产生零个、一个或多个元素
                   *
                   * */
                  public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
                      String[] s = value.split(" ");
                      for (int i = 0; i < s.length; i++) {
                          out.collect(new Tuple2<String, Integer>(s[i], 1));
                      }
      
                  }
              });
      
              source.filter(new FilterFunction<String>() {
                  /**
                   * filter算子: 对每个元素计算一个布尔函数,并保留该函数返回true的那些元素
                   * */
                  public boolean filter(String value) throws Exception {
                      return value == "filter";
                  }
              });
      
      
              KeyedStream<Tuple2<String, Integer>, String> keyedStream = flatMapSource.keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
                  public String getKey(Tuple2<String, Integer> value) throws Exception {
                      return value.f0;
                  }
              });
      
          }
      
      }
      
    • 滚动聚合算子

      • sum

      • max

      • min

      • minBy

      • maxBy

        package transform;
        
        
        import org.apache.flink.api.common.functions.FlatMapFunction;
        import org.apache.flink.api.java.tuple.Tuple3;
        import org.apache.flink.streaming.api.datastream.DataStreamSource;
        import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
        import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
        import org.apache.flink.util.Collector;
        
        /**
         * @author wangkai
         */
        public class RollingAggregation {
            public static void main(String[] args) throws Exception{
                StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
                env.setParallelism(1);
        
                /**
                 * 滚动聚合算子:sum max min maxBy minBy
                 *
                 * */
                
                
                /**
                 * 
                 * sensor1 123456789 35
                 * sensor2 234567890 36
                 * sensor3 456962456 24
                 * sensor1 123456789 20
                 * 
                 * 
                 * */
        
                DataStreamSource<String> source = env.readTextFile("D:\\git\\csdn-flink\\csdn-flink-1\\src\\main\\resources\\sensor");
        
                SingleOutputStreamOperator<Tuple3<String, String, Long>> stream = source.flatMap(new FlatMapFunction<String, Tuple3<String, String, Long>>() {
                    public void flatMap(String value, Collector<Tuple3<String, String, Long>> out) throws Exception {
                        String[] s = value.split(" ");
                        out.collect(new Tuple3<String, String, Long>(s[0], s[1], Long.parseLong(s[2])));
                    }
                });
        
                /**
                 * sum
                 * */
        
                SingleOutputStreamOperator<Tuple3<String, String, Long>> sum = stream
                        .keyBy(0)
                        .sum(2);
        
                sum.print("sum");
        
                /**
                 * max
                 * */
                SingleOutputStreamOperator<Tuple3<String, String, Long>> max = stream
                        .keyBy(0)
                        .max(2);
                max.print("max");
        
                /**
                 * min
                 * */
                SingleOutputStreamOperator<Tuple3<String, String, Long>> min = stream
                        .keyBy(0)
                        .min(2);
                min.print("min");
        
                /**
                 * maxBy
                 * */
                SingleOutputStreamOperator<Tuple3<String, String, Long>> maxBy = stream
                        .keyBy(0)
                        .maxBy(2);
                maxBy.print("maxby");
        
        
                /**
                 * minBy
                 * */
                SingleOutputStreamOperator<Tuple3<String, String, Long>> minBy = stream
                        .keyBy(0)
                        .minBy(2);
                minBy.print("minby");
        
        
                env.execute("rolling aggregate");
                
                /**
                 * max> (sensor1,123456789,35)
                 * min> (sensor1,123456789,35)
                 * maxby> (sensor1,123456789,35)
                 * minby> (sensor1,123456789,35)
                 * sum> (sensor1,123456789,35)
                 * maxby> (sensor2,234567890,36)
                 * min> (sensor2,234567890,36)
                 * max> (sensor2,234567890,36)
                 * maxby> (sensor3,456962456,24)
                 * sum> (sensor2,234567890,36)
                 * min> (sensor3,456962456,24)
                 * minby> (sensor2,234567890,36)
                 * sum> (sensor3,456962456,24)
                 * min> (sensor1,123456789,20)
                 * maxby> (sensor1,123456789,35)
                 * max> (sensor3,456962456,24)
                 * sum> (sensor1,123456789,55)
                 * max> (sensor1,123456789,35)
                 * minby> (sensor3,456962456,24)
                 * minby> (sensor1,123456789,20)
                 * */
            }
        
        
        }
        
        
    • Reduce

      package transform;
      
      import org.apache.flink.api.common.functions.FlatMapFunction;
      import org.apache.flink.api.common.functions.ReduceFunction;
      import org.apache.flink.api.java.tuple.Tuple3;
      import org.apache.flink.streaming.api.datastream.DataStreamSource;
      import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
      import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
      import org.apache.flink.util.Collector;
      
      /**
       * @author wangkai 
      
       */
      public class Reduce {
          public static void main(String[] args) throws Exception{
              StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
              env.setParallelism(1);
              /**
               *
               * KeyedStream → DataStream:一个分组数据流的聚合操作,合并当前的元素
               * 和上次聚合的结果,产生一个新的值,返回的流中包含每一次聚合的结果,而不是
               * 只返回最后一次聚合的最终结果
               * 
               * sensor1 123456789 35
               * sensor2 234567890 36
               * sensor3 456962456 24
               * sensor1 123456789 20
               * */
              DataStreamSource<String> source = env.readTextFile("D:\\git\\csdn-flink\\csdn-flink-1\\src\\main\\resources\\sensor");
      
              SingleOutputStreamOperator<Tuple3<String, String, Long>> stream = source.flatMap(new FlatMapFunction<String, Tuple3<String, String, Long>>() {
                  public void flatMap(String value, Collector<Tuple3<String, String, Long>> out) throws Exception {
                      String[] s = value.split(" ");
                      out.collect(new Tuple3<String, String, Long>(s[0], s[1], Long.parseLong(s[2])));
                  }
              });
      
      
              SingleOutputStreamOperator<Tuple3<String, String, Long>> reduce = stream
                      .keyBy(0)
                      .reduce(new ReduceFunction<Tuple3<String, String, Long>>() {
                          public Tuple3<String, String, Long> reduce(Tuple3<String, String, Long> value1, Tuple3<String, String, Long> value2) throws Exception {
                              return new Tuple3<String, String, Long>(value1.f0, value1.f1, Math.max(value1.f2, value2.f2));
                          }
                      });
      
              
              reduce.print("reduce");
      
              env.execute("reduce");
              
              
              /**
               * reduce> (sensor1,123456789,35)
               * reduce> (sensor2,234567890,36)
               * reduce> (sensor3,456962456,24)
               * reduce> (sensor1,123456789,35)
               * */
          }
      }
      
      
    • Split和select

      package transform;
      
      import org.apache.flink.api.common.functions.FlatMapFunction;
      import org.apache.flink.api.java.tuple.Tuple3;
      import org.apache.flink.streaming.api.collector.selector.OutputSelector;
      import org.apache.flink.streaming.api.datastream.DataStream;
      import org.apache.flink.streaming.api.datastream.DataStreamSource;
      import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
      import org.apache.flink.streaming.api.datastream.SplitStream;
      import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
      import org.apache.flink.util.Collector;
      
      import java.util.Collections;
      
      /**
       * @author 
       */
      public class SplitAndSelect {
          public static void main(String[] args) throws Exception{
              StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
              env.setParallelism(1);
              /**
               *split:DataStream → SplitStream:根据某些特征把一个 DataStream 拆分成两个或者多个DataStream。
               *select:SplitStream→DataStream:从一个 SplitStream 中获取一个或者多个DataStream
               *
               * sensor1 123456789 35
               * sensor2 234567890 36
               * sensor3 456962456 24
               * sensor1 123456789 20
               * */
              DataStreamSource<String> source = env.readTextFile("D:\\git\\csdn-flink\\csdn-flink-1\\src\\main\\resources\\sensor");
      
              SingleOutputStreamOperator<Tuple3<String, String, Long>> stream = source.flatMap(new FlatMapFunction<String, Tuple3<String, String, Long>>() {
                  public void flatMap(String value, Collector<Tuple3<String, String, Long>> out) throws Exception {
                      String[] s = value.split(" ");
                      out.collect(new Tuple3<String, String, Long>(s[0], s[1], Long.parseLong(s[2])));
                  }
              });
      
      
              SplitStream<Tuple3<String, String, Long>> split = stream.split(new OutputSelector<Tuple3<String, String, Long>>() {
                  public Iterable<String> select(Tuple3<String, String, Long> value) {
                      return value.f2 > 30 ? Collections.singletonList("high") : Collections.singletonList("low");
                  }
              });
      
              split.print("split");
      
              DataStream<Tuple3<String, String, Long>> high = split.select("high");
              DataStream<Tuple3<String, String, Long>> low = split.select("low");
      
              high.print("high");
              low.print("low");
      
      
              env.execute("split and select");
              
              
              /**
               * 
               * split> (sensor1,123456789,35)
               * high> (sensor1,123456789,35)
               * split> (sensor2,234567890,36)
               * high> (sensor2,234567890,36)
               * split> (sensor3,456962456,24)
               * low> (sensor3,456962456,24)
               * split> (sensor1,123456789,20)
               * low> (sensor1,123456789,20)
               * 
               * */
      
          }
      }
      
      
    • connect和CoMap

      DataStream<Tuple3<String, String, Long>> high = split.select("high");
              DataStream<Tuple3<String, String, Long>> low = split.select("low");
      
              high.print("high");
              low.print("low");
      
      
              ConnectedStreams<Tuple3<String, String, Long>, Tuple3<String, String, Long>> connect = high.connect(low);
              connect.flatMap(new CoFlatMapFunction<Tuple3<String, String, Long>, Tuple3<String, String, Long>, Object>() {
                  public void flatMap1(Tuple3<String, String, Long> value, Collector<Object> out) throws Exception {
                      out.collect(value);
      
                  }
      
                  public void flatMap2(Tuple3<String, String, Long> value, Collector<Object> out) throws Exception {
                      out.collect(value);
      
                  }
              });
      
    • union

       DataStream<Tuple3<String, String, Long>> union = high.union(low);
      
        Connect 与 Union 区别:
         1. Union 之前两个流的类型必须是一样,Connect 可以不一样,在之后的 coMap中再去调整成为一样的。
         2. Connect 只能操作两个流,Union 可以操作多个。
          
      
  • 支持的数据类型

    Flink 流应用程序处理的是以数据对象表示的事件流。所以在 Flink 内部,我们 需要能够处理这些对象。它们需要被序列化和反序列化,以便通过网络传送它们; 或者从状态后端、检查点和保存点读取它们。为了有效地做到这一点,Flink 需要明 确知道应用程序所处理的数据类型。Flink 使用类型信息的概念来表示数据类型,并 为每个数据类型生成特定的序列化器、反序列化器和比较器。 Flink 还具有一个类型提取系统,该系统分析函数的输入和返回类型,以自动获 取类型信息,从而获得序列化器和反序列化器。但是,在某些情况下,例如 lambda 函数或泛型类型,需要显式地提供类型信息,才能使应用程序正常工作或提高其性 能。

  • 自定义udf函数
    • 函数类

    • 匿名函数

    • 富函数

      “富函数”是 DataStream API 提供的一个函数类的接口,所有 Flink 函数类都 有其 Rich 版本。它与常规函数的不同在于,可以获取运行环境的上下文,并拥有一 些生命周期方法,所以可以实现更复杂的功能。 有一个生命周期的概念。典型的生命周期方法有: open()方法是 rich function 的初始化方法,当一个算子例如 map 或者 filter 被调用之前 open()会被调用。 close()方法是生命周期中的最后一个调用的方法,做一些清理工作。 getRuntimeContext()方法提供了函数的 RuntimeContext 的一些信息,例如函 数执行的并行度,任务的名字,以及 state 状态

  • Sink
    • 自定义sink函数

      package sink;
      
      import org.apache.flink.configuration.Configuration;
      import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
      
      import java.sql.Connection;
      import java.sql.DriverManager;
      import java.sql.PreparedStatement;
      
      public class MyJdbcSink extends RichSinkFunction<String> {
          Connection conn = null;
          PreparedStatement insertStmt = null;
      
      
          @Override
          public void open(Configuration parameters) throws Exception {
              conn = DriverManager.getConnection("url", "username", "password");
              insertStmt = conn.prepareStatement("insert into xxx (a) values (?)");
          }
      
          /**
           * 调用连接,执行sql
           */
      
          @Override
          public void invoke(String value, Context context) throws Exception {
              insertStmt.setString(1,"test");
              insertStmt.execute();
      
          }
      
          @Override
          public void close() throws Exception {
              insertStmt.close();
              conn.close();
          }
      }
      
      
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值