yarn界面: http://192.168.80.139:8088
spark页面: http://192.168.80.139:8080
HDFS界面: http://192.168.80.139:9870
nodemanager界面: http://192.168.80.139:8042
以kmeans为例
本地模式:(使用sc.master查看模式)
直接输入spark-shell
:paste进入粘贴模式
import org.apache.spark.mllib.clustering._
import org.apache.spark.mllib.linalg.Vectors
val data = sc.textFile("file:///test/kmeans_data.txt")
val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble))).cache()
val model = KMeans.train(parsedData,2,20)
model.clusterCenters.foreach(println)
yarn模式:
spark-shell --master yarn --deploy-mode client
:paste进入粘贴模式
import org.apache.spark.mllib.clustering._
import org.apache.spark.mllib.linalg.Vectors
val data = sc.textFile("hdfs://hadoop00:9000/test/kmeans_data.txt")
val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble))).cache()
val model = KMeans.train(parsedData,2,20)
model.clusterCenters.foreach(println)