要读取文件的HDFS路径信息如下图
Spark读取HDFS文件代码示例如下
package demo
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object _00wcHDFS {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf
conf.setMaster("local").setAppName("wc")
val context: SparkContext = new SparkContext(conf)
context.setLogLevel("error")
//注意路径书写方式hdfs:// /flink...
val sh: RDD[String] = context.textFile("hdfs:///flink/data/wc/submit.sh")
val res: RDD[(String, Int)] = sh.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).filter(_._2>1)
res.foreach(println)
}
Flink读取HDFS文件代码示例如下
package demo
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
object _01readHDFS {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
//注意路径书写方式hdfs:// /flink...
val textStream = env.readTextFile("hdfs:///flink/data/wc/submit.sh")
textStream.flatMap(_.split(" ")).map((_,1)).keyBy(0).sum(1).print()
//读完就停止
env.execute()
}
}