flatMap源码
/**
* Return a new RDD by first applying a function to all elements of this
* RDD, and then flattening the results.
*/
def flatMap[U: ClassTag](f: T => TraversableOnce[U]): RDD[U] = withScope {
val cleanF = sc.clean(f)
new MapPartitionsRDD[U, T](this, (context, pid, iter) => iter.flatMap(cleanF))
}
demo
public class FlatMapOperator {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("FlatMapOperator").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf);
/**
* Similar to map, but each input item can be mapped to 0 or more output items (so func should return a Seq rather than a single item).
*
*/
List<String> lineList = Arrays.asList("hello hadoop", "hello hdfs", "hello mapreduce", "hello spark");
JavaRDD<String> lines = sc.parallelize(lineList);
JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
private static final long serialVersionUID = 1L;
@Override
public Iterable<String> call(String line) throws Exception {
return Arrays.asList(line.split(" "));
}
});
words.foreach(new VoidFunction<String>() {
private static final long serialVersionUID = 1L;
@Override
public void call(String word) throws Exception {
System.out.println(word);
}
});
sc.close();
}
}
输出
hello
hadoop
hello
hdfs
hello
mapreduce
hello
spark