var sconf = new SparkConf().setAppName(this.getClass.getName).setMaster("yarn")
var sc = new SparkContext(sconf)
sc.textFile("hdfs://m2:9820/README.md")
查看textFile方法内容如下:
/**
* Read a text file from HDFS, a local file system (available on all nodes), or any
* Hadoop-supported file system URI, and return it as an RDD of Strings.
*/
def textFile(
path: String,
minPartitions: Int = defaultMinPartitions): RDD[String] = withScope {
assertNotStopped()
hadoopFile(path, classOf[TextInputFormat], classOf[LongWritable], classOf[Text],
minPartitions).map(pair => pair._2.toString).setName(path)
}
采用的是MR读取HDFS文件的方式