Flink DateSet定制API详解(Scala版) -002
flatMap
以element 为粒度,对element 进行1 :n的转化。
执行程序:
package code.book.batch.dataset.advance.api
import org.apache.flink.api.common.functions.FlatMapFunction
import org.apache.flink.api.scala.{ExecutionEnvironment, _}
import org.apache.flink.util.Collector
object FlatMapFunction001scala {
def main(args: Array[String]): Unit = {
val env = ExecutionEnvironment.getExecutionEnvironment
val text = env.fromElements("flink vs spark" , "buffer vs shuffer" )
val text2 = text.flatMap(new FlatMapFunction[String, String]() {
override def flatMap(s: String, collector: Collector[String]): Unit = {
collector.collect(s.toUpperCase() + "--##bigdata##" )
}
})
text2.print()
val text3 = text.flatMap {
new FlatMapFunction[String, Array[String]] {
override def flatMap(s: String, collector: Collector[Array[String]]): Unit = {
val arr: Array[String] = s.toUpperCase().split("\\s+" )
collector.collect(arr)
}
}
}
text3.collect().foreach(_.foreach(println(_)))
text3.collect().foreach(arr => {
arr.foreach(token => {
println(token)
})
})
}
}
执行结果:
text2.print()
FLINK VS SPARK--##bigdata##
BUFFER VS SHUFFER--##bigdata##
text3.collect().foreach(_.foreach(println(_)))
FLINK
VS
SPARK
BUFFER
VS
SHUFFLE
filter
以element 为粒度,对element 进行过滤操作。将满足过滤条件的element 组成新的DataSet
执行程序:
package code.book.batch.dataset.advance.api
import org.apache.flink.api.common.functions.FilterFunction
import org.apache.flink.api.scala.{ExecutionEnvironment, _}
object FilterFunction001scala {
def main(args: Array[String]): Unit = {
val env = ExecutionEnvironment.getExecutionEnvironment
val text = env.fromElements(2 , 4 , 7 , 8 , 9 , 6 )
val text2 = text.filter(new FilterFunction[Int] {
override def filter(t: Int): Boolean = {
t % 2 == 0
}
})
text2.print()
val text3 = text.filter(new FilterFunction[Int] {
override def filter(t: Int): Boolean = {
t >5
}
})
text3.print()
}
}
执行结果:
text2.print()
2
4
8
6
text3.print()
7
8
9
6