#!/usr/bin/env bashif[ -z "${SPARK_HOME}"];thensource"$(dirname"$0")"/find-spark-home
fi
// 运行脚本 加载saprk的环境
."${SPARK_HOME}"/bin/load-spark-env.sh
# Find the java binaryif[ -n "${JAVA_HOME}"];then
//初始化 java的命令
RUNNER="${JAVA_HOME}/bin/java"elseif["$(command -v java)"];thenRUNNER="java"elseecho"JAVA_HOME is not set">&2exit1fifi# Find Spark jars.if[ -d "${SPARK_HOME}/jars"];thenSPARK_JARS_DIR="${SPARK_HOME}/jars"elseSPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION/jars"fiif[! -d "$SPARK_JARS_DIR"]&&[ -z "$SPARK_TESTING$SPARK_SQL_TESTING"];thenecho"Failed to find Spark jars directory ($SPARK_JARS_DIR)."1>&2echo"You need to build Spark with the target \"package\" before running this program."1>&2exit1elseLAUNCH_CLASSPATH="$SPARK_JARS_DIR/*"fi# Add the launcher build dir to the classpath if requested.if[ -n "$SPARK_PREPEND_CLASSES"];thenLAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH"fi# For testsif[[ -n "$SPARK_TESTING"]];thenunset YARN_CONF_DIR
unset HADOOP_CONF_DIR
fi# The launcher library will print arguments separated by a NULL character, to allow arguments with# characters that would be otherwise interpreted by the shell. Read that in a while loop, populating# an array that will be used to exec the final command.## The exit code of the launcher is appended to the output, so the parent shell removes it from the# command array and checks the value to see if the launcher succeeded.build_command(){
"$RUNNER" -Xmx128m $SPARK_LAUNCHER_OPTS -cp "$LAUNCH_CLASSPATH" org.apache.spark.launcher.Main "$@"printf"%d\0"$?}# Turn off posix mode since it does not allow process substitutionset +o posix
CMD=()DELIM=$'\n'CMD_START_FLAG="false"whileIFS=read -d "$DELIM" -r ARG;doif["$CMD_START_FLAG"=="true"];thenCMD+=("$ARG")elseif["$ARG"== $'\0'];then# After NULL character is consumed, change the delimiter and consume command string.DELIM=''CMD_START_FLAG="true"elif["$ARG"!=""];thenecho"$ARG"fifidone<<(build_command "$@")COUNT=${
#CMD[@]}LAST=$((COUNT -1))LAUNCHER_EXIT_CODE=${CMD[$LAST]}# Certain JVM failures result in errors being printed to stdout (instead of stderr), which causes# the code that parses the output of the launcher to get confused. In those cases, check if the# exit code is an integer, and if it's not, handle it as a special error case.if![[$LAUNCHER_EXIT_CODE=~ ^[0-9]+$ ]];thenecho"${CMD[@]}"|head -n-1 1>&2exit1fiif[$LAUNCHER_EXIT_CODE!=0];thenexit$LAUNCHER_EXIT_CODEfiCMD=("${CMD[@]:0:$LAST}")exec"${CMD[@]}"
def doSubmit(args: Array[String]):Unit={
// Initialize logging if it hasn't been done yet. Keep track of whether logging needs to// be reset before the application starts.val uninitLog = initializeLogIfNecessary(true, silent =true)val appArgs = parseArguments(args)if(appArgs.verbose){
logInfo(appArgs.toString)}/**
* 根据你的 spark-submit 的命令来决定到底执行那个方法
* $CLASS 参数
*/
appArgs.action match{
// 提交 app 运行case SparkSubmitAction.SUBMIT => submit(appArgs, uninitLog)// 停止任务case SparkSubmitAction.KILL => kill(appArgs)// 获取状态case SparkSubmitAction.REQUEST_STATUS => requestStatus(appArgs)// 输出答应版本号码case SparkSubmitAction.PRINT_VERSION => printVersion()}}
因为是提交任务 流程 我们调集 submit(appArgs,uninitLog)
/**
* Submit the application using the provided parameters, ensuring to first wrap
* in a doAs when --proxy-user is specified.
*/@tailrecprivatedef submit(args: SparkSubmitArguments, uninitLog:Boolean):Unit={
def doRunMain():Unit={
if(args.proxyUser !=null){
val proxyUser = UserGroupInformation.createProxyUser(args.proxyUser, UserGroupInformation.getCurrentUser())try{
proxyUser.doAs(new PrivilegedExceptionAction[Unit](){
overridedef run():Unit={
runMain(args, uninitLog)}})}catch{
case e: Exception =>// Hadoop's AuthorizationException suppresses the exception's stack trace, which// makes the message printed to the output by the JVM not very helpful. Instead,// detect exceptions with empty stack traces here, and treat them differently.if(e.getStackTrace().length ==0){
error(s"ERROR: ${e.getClass().getName()}: ${e.getMessage()}")}else{
throw e
}}}else{
runMain(args, uninitLog)}}// In standalone cluster mode, there are two submission gateways:// (1) The traditional RPC gateway using o.a.s.deploy.Client as a wrapper// (2) The new REST-based gateway introduced in Spark 1.3// The latter is the default behavior as of Spark 1.3, but Spark submit will fail over// to use the legacy gateway if the master endpoint turns out to be not a REST server.if(args.isStandaloneCluster && args.useRest){
try{
logInfo("Running Spark using the REST application submission protocol.")
doRunMain()