1. Spark源码解析之启动脚本解析_spark start-slave.sh-优快云博客

本文链接：https://blog.youkuaiyun.com/lingeio/article/details/96379627

本文解析了Spark集群启动过程中的关键脚本，包括start-all.sh、start-master.sh、start-slaves.sh等，阐述了各脚本的功能及相互间的调用关系，深入介绍了Spark集群的启动流程。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

从零开始解读Spark源码。前期记录详细点。

Spark启动方式主要有两种：start-all.sh一键启动，start-master.sh和start-slave.sh单独启动master和slave。

运行Spark的方式也是两种：spark-shell和spark-submit。

这里解析所有相关启动脚本。

Spark集群启动脚本
start-all.sh
start-master.sh
start-slave.sh
spark-shell
spark-submit

Spark集群启动脚本

start-all.sh

主要是启动spark-config.sh、start-master.sh、start-slaves.sh。

也就是负责：加载conf目录，启动master节点、启动worker节点。

因为启动方式不同，启动的脚本不同，所以spark目录、conf目录的环境变量其他脚本都会导入。

# 加载spark目录为环境变量
if [ -z "${SPARK_HOME}" ]; then    #Spark环境变量为空，则执行下面export
  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"    #切换到脚本父目录,导入SPARK_HOME环境变量
# "$0" 当前脚本名
# `dirname "$0"` 当前脚本目录
# $(cd "`dirname "$0"`"/..;pwd) 先切换到脚本父目录，并获取路径。就是获取Spark根目录路径
fi

# Load the Spark configuration
. "${SPARK_HOME}/sbin/spark-config.sh"    #启动sparkconf设置脚本

# Start Master
"${SPARK_HOME}/sbin"/start-master.sh    #启动master的脚本

# Start Workers
"${SPARK_HOME}/sbin"/start-slaves.sh    #启动workers的脚本

spark-config.sh

主要是：导入SPARK_CONF_DIR、添加PySPARK到Python路径。

这个脚本就是负责：导入conf 目录为环境变量、导入PySpark环境变量。

# symlink and absolute path should rely on SPARK_HOME to resolve
if [ -z "${SPARK_HOME}" ]; then
  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi

# 导入sparkconf目录,如果SPARK_CONF_DIR为空，返回${SPARK_HOME}/conf
export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}"    

# Add the PySpark classes to the PYTHONPATH:
if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then    #如果pyspark状态为未设置
  export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"    #添加pyspark到python路径
  export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.7-src.zip:${PYTHONPATH}"
  export PYSPARK_PYTHONPATH_SET=1
fi

start-master.sh

主要是：设置master类、设置master节点host、port、启动load-saprk-env.sh、spark-daemon.sh。

# Starts the master on the machine this script is executed on.

if [ -z "${SPARK_HOME}" ]; then
  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi

# NOTE: This exact class name is matched downstream by SparkSubmit. # 这个主类名用于下游sparksubmit匹配
# Any changes need to be reflected there.
# 设置CLASS变量为master类
CLASS="org.apache.spark.deploy.master.Master"

# 判断参数，打印帮助信息
# 如果输入的是spark-master.sh --help或-h,打印帮助信息并退出
# 但是从start-all.sh传过的,来没有参数
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
  echo "Usage: ./sbin/start-master.sh [options]"
  pattern="Usage:"
  pattern+="\|Using Spark's default log4j profile:"    # +=添加,\|表示或
  pattern+="\|Registered signal handlers for"

  # 打印帮助信息
  # 加载spark-class,再执行launch/main,见下面spark-class中build_command方法
  # 传入参数.master类，调用--help方法打印帮助信息
  # 将错误信息重定向到标准输出,过滤含有pattern的字符串
  # 完整：spark-class org.apache.spark.deploy.master.Master --help
  "${SPARK_HOME}"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
  exit 1
fi

# 将调用start-master的参数列表赋值给ORIGINAL_ARGS
# 从start-all.sh传过的,来没有参数
ORIGINAL_ARGS="$@"

. "${SPARK_HOME}/sbin/spark-config.sh"    # 同样加载conf目录为环境变量

. "${SPARK_HOME}/bin/load-spark-env.sh"    # 启动加载spark-env的脚本

if [ "$SPARK_MASTER_PORT" = "" ]; then    # 如果master端口为空,设置默认为7077
  SPARK_MASTER_PORT=7077
fi

# 设置master的host,即当前脚本运行主机名
if [ "$SPARK_MASTER_HOST" = "" ]; then    # 如果master的host为空
  case `uname` in        # 匹配hostname,lunix下查看hostname命令为uname
      (SunOS)            # 如果hostname为SunOs,设置host为查看hostname的最后一个字段
	  SPARK_MASTER_HOST="`/usr/sbin/check-hostname | awk '{print $NF}'`"
	  ;;    # 匹配中断
      (*)    # 如果hostname为其他,设置为hostname -f查看的结果
	  SPARK_MASTER_HOST="`hostname -f`"
	  ;;
  esac    #匹配结束
fi

# 如果webUI端口为空,设置默认为8080
if [ "$SPARK_MASTER_WEBUI_PORT" = "" ]; then
  SPARK_MASTER_WEBUI_PORT=8080
fi

# 启动spark-daemon脚本,参数为：start、$CLASS、1、host、port、webUI-port、$ORIGINAL_ARGS
# 直译为:
# sbin/spark-daemon.sh start org.apache.spark.deploy.master.Master 1
# --host hostname --port 7077 --webui-port 8080
"${SPARK_HOME}/sbin"/spark-daemon.sh start $CLASS 1 \    
  --host $SPARK_MASTER_HOST --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT \
  $ORIGINAL_ARGS

spark-class

主要加载java目录、spark-jars目录、调用org.apache.spark.launcher.Main解析后返回参数并执行参数中的类。其实最后就是执行各种类。

几乎所有的spark服务最终都会调用spark-class来执行类。

# 导入spark目录
# source执行命令不重启生效，如source /etc/profile
if [ -z "${SPARK_HOME}" ]; then
  source "$(dirname "$0")"/find-spark-home
fi

# 导入spark-env中的设置
. "${SPARK_HOME}"/bin/load-spark-env.sh

# Find the java binary
# 加载java路径,赋值给RUNNER
if [ -n "${JAVA_HOME}" ]; then         # 判断java环境变量不为0,获取java路径
  RUNNER="${JAVA_HOME}/bin/java"
else
  if [ "$(command -v java)" ]; then    # 如果为0,查看java路径
    RUNNER="java"
  else
    echo "JAVA_HOME is not set" >&2    # 查看不到报错,并错误退出
    exit 1
  fi
fi

# Find Spark jars.
# 加载spark的jars目录位置
if [ -d "${SPARK_HOME}/jars" ]; then    #判断是目录
  SPARK_JARS_DIR="${SPARK_HOME}/jars"
else
  SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION/jars"
fi

# 定义执行文件径路LAUNCH_CLASSPATH
# 判断spark_jars不是目录,而且Testing变量值为0,报错退出
# 都存在则定义执行文件径路为jars位置下
if [ ! -d "$SPARK_JARS_DIR" ] && [ -z "$SPARK_TESTING$SPARK_SQL_TESTING" ]; then
  echo "Failed to find Spark jars directory ($SPARK_JARS_DIR)." 1>&2
  echo "You need to build Spark with the target \"package\" before running this program." 1>&2
  exit 1
else
  LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*"
fi

# Add the launcher build dir to the classpath if requested.
# 根据运行环境不同,为执行文件目录添加多路径
if [ -n "$SPARK_PREPEND_CLASSES" ]; then
 LAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH"
fi

# For tests
# 如果Test模式,关闭Yarn和Hadoop
if [[ -n "$SPARK_TESTING" ]]; then
  unset YARN_CONF_DIR
  unset HADOOP_CONF_DIR
fi

# The launcher library will print arguments separated by a NULL character, to allow arguments with
# characters that would be otherwise interpreted by the shell. Read that in a while loop, populating
# an array that will be used to exec the final command.
#
# The exit code of the launcher is appended to the output, so the parent shell removes it from the
# command array and checks the value to see if the launcher succeeded.

# 调用执行文件目录下的org.apache.spark.launcher.Main方法
# 传入执行类及参数,解析后返回参数列表
build_command() {
  "$RUNNER" -Xmx128m -cp "$LAUNCH_CLASSPATH" org.apache.spark.launcher.Mai