一,部署部分省略,官网有详细的教程,顺着步骤做就好了。
二,先说说调用spark运行wordcount案例流程
1,编写代码
package com.sjb.example
import org.apache.log4j.Logger
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession
// com.sjb.example.WordCount
object WordCount {
var LOGGER:Logger = Logger.getLogger(WordCount.getClass)
def main(args: Array[String]): Unit = {
// val wordFile = "file:\\C:\\Users\\Administrator\\Desktop\\test.txt"
// val wordFile = "file:/wyyt/software/flink-1.11.2/test.txt"
// val wordFile = "/spark/test/data/test.txt"
val wordFile = "/dolphinscheduler/test/resources/spark_jar/word.txt"
System.setProperty("HADOOP_USER_NAME", "hive")
// System.setProperty("HADOOP_USER_NAME", "test")
// System.setProperty("HADOOP_USER_NAME", "dolphinschedu