- 启动Hadoop HDFS
hadoop@dhjvirtualmachine:/usr/local/hadoop/sbin$ ./start-dfs.sh
- 创建数据文件
hadoop@dhjvirtualmachine:/usr/local/spark/mycode$ mkdir wordcount
hadoop@dhjvirtualmachine:/usr/local/spark/mycode$ cd wordcount
hadoop@dhjvirtualmachine:/usr/local/spark/mycode$ vim word.txt
- 启动spark-shell 1.6.2
hadoop@dhjvirtualmachine:/usr/local/spark$ ./bin/spark-shell
- 加载本地文件
scala> val textFile = sc.textFile("file:///usr/local/spark/mycode/wordcount/word.txt")
打印
textFile: org.apache.spark.rdd.RDD[String] = file:///usr/local/spark/mycode/wordcount/word.txt MapPartitionsRDD[1] at textFile at <console>:27
打印第一行
textFile.first()
打印
res1: String = this is new fille
文件变量写回文件
scala> textFile.saveAsTextFile("file:///usr/lo