spark-core_19:ActorSystem的初始化源码分析

最新推荐文章于 2022-12-11 17:25:34 发布

原创最新推荐文章于 2022-12-11 17:25:34 发布 · 406 阅读

0 ·

CC 4.0 BY-SA版权

spark 同时被 2 个专栏收录

38 篇文章

订阅专栏

core

29 篇文章

订阅专栏

本文详细介绍了Apache Spark中ActorSystem的创建过程，包括在SparkEnv中初始化ActorSystem的方法，AkkaUtils类中的createActorSystem函数实现，以及最终启动ActorSystem的具体步骤。

1，在SparkEnv再创建时调用create(),里面调用初始ActorSystem

注：ActorSystem在后面版本会被RpcEnv替换掉

private def create(
    conf: SparkConf,
    executorId: String,
    hostname: String,
    port: Int,
    isDriver: Boolean,
    isLocal: Boolean,
    numUsableCores: Int,
    listenerBus: LiveListenerBus = null,
    mockOutputCommitCoordinator:Option[OutputCommitCoordinator] = None): SparkEnv = {

。。。。 // Create the ActorSystem for Akka and get theport it binds to.
// 创建ActorSystem及返回对应actorSystem的port
// driverActorSystemName = "sparkDriver" ,executorActorSystemName = "sparkExecutor"
val actorSystemName= if (isDriver) driverActorSystemName else executorActorSystemName
//创建RpcEnv，在1.6开始已使用NettyRpcEnv,并且也不在使用ActorSystem
//如果port是0话，会给RpcEnv.address.prot动态分配一个非0的端口
val rpcEnv= RpcEnv.create(actorSystemName, hostname, port, conf, securityManager,
    clientMode = !isDriver)
val actorSystem:ActorSystem =
    if (rpcEnv.isInstanceOf[AkkaRpcEnv]){
     rpcEnv.asInstanceOf[AkkaRpcEnv].actorSystem
    } else {
      val actorSystemPort=
        if (port== 0 || rpcEnv.address == null) {
          port
        } else {
          rpcEnv.address.port + 1
        }
      // Create a ActorSystem for legacy codes
      //该方法返回tuple(ActorSystem,ActorSystem的port),同时将ActorSystem引用给当前变量actorSystem
      AkkaUtils.createActorSystem(
        actorSystemName + "ActorSystem",
        hostname,
        actorSystemPort,
        conf,
        securityManager
      )._1
    }

2，进入AkkaUtils.createActorSystem()

private[spark] object AkkaUtilsextends Logging {

/**
   * Creates an ActorSystem ready forremoting, with various Spark features. Returns both the ActorSystem itself andits port (which is hard to get from Akka).
   *
   * Note: the `name` parameter is important, as even if a clientsends a message to right
   * host + port, if the system name isincorrect, Akka will drop the message.
   *
   * If indestructible is set to true,the Actor System will continue running in the event
   * of a fatal exception. This is usedby [[org.apache.spark.executor.Executor]].
    *
    * 创建远程ActorSystem,提供各种功能，该方法返回ActorSystem和ActorSystem的port
    * 注意：`name`参数非常重要，因为它是actorSystem的标识。
    * 如果indestructible设置为true，actorSystem可以运行在致命的execption中，由Executor进行使用
    * name= sparkDriverActorSystem或sparkExecutorActorSystem 、hosst:当前节点的ip或主机名，Port=0
   */
def createActorSystem(
      name: String,
      host: String,
      port: Int,
      conf: SparkConf,
      securityManager: SecurityManager): (ActorSystem, Int) = {
    //定义一个匿名函数，给下面startServiceOnPort启动
    val startService:Int => (ActorSystem, Int) ={ actualPort =>
      doCreateActorSystem(name, host, actualPort, conf, securityManager)
    }
    /**
      * 18/04/17 19:12:47 INFO Remoting:Remoting started; listening on addresses :[akka.tcp://sparkDriverActorSystem@192.168.1.152:35868]
       18/04/17 19:13:23 INFO util.Utils:Successfully started service 'sparkDriverActorSystem' on port 35868.
      */
    Utils.startServiceOnPort(port, startService, conf, name)
}

3，会调用startService函数，再调用doCreateActorSystem()

private def doCreateActorSystem(
    name: String,
    host: String,
    port: Int,
    conf: SparkConf,
    securityManager: SecurityManager): (ActorSystem, Int) = {
//给actorSystem设置各种默认配制
val akkaThreads= conf.getInt("spark.akka.threads", 4)
val akkaBatchSize= conf.getInt("spark.akka.batchSize", 15)
val akkaTimeoutS= conf.getTimeAsSeconds("spark.akka.timeout",
    conf.get("spark.network.timeout", "120s"))
//以字节为单位返回Akka消息的已配置最大帧frame大小。这个maxFrameSizeBytes返回值是128M
val akkaFrameSize= maxFrameSizeBytes(conf)
val akkaLogLifecycleEvents= conf.getBoolean("spark.akka.logLifecycleEvents", false)
val lifecycleEvents= if (akkaLogLifecycleEvents) "on" else "off"
if (!akkaLogLifecycleEvents){
    // As a workaround for Akka issue #3787, we coerce the"EndpointWriter" log to be silent.
    // See:https://www.assembla.com/spaces/akka/tickets/3787#/
    Option(Logger.getLogger("akka.remote.EndpointWriter")).map(l => l.setLevel(Level.FATAL))
}

val logAkkaConfig= if (conf.getBoolean("spark.akka.logAkkaConfig", false)) "on"else "off"
/** conf.getTimeAsSeconds()
    * 以秒为单位获取时间参数，如果未设置，则返回到默认值。如果没有提供后缀，则假定秒。
    * 比如： 50s,100ms, or 250us
    */
val akkaHeartBeatPausesS= conf.getTimeAsSeconds("spark.akka.heartbeat.pauses", "6000s")
val akkaHeartBeatIntervalS= conf.getTimeAsSeconds("spark.akka.heartbeat.interval", "1000s")
//什么都没有设置，默认是null
val secretKey= securityManager.getSecretKey()
//isAuthOn默认值是false
val isAuthOn= securityManager.isAuthenticationEnabled()
if (isAuthOn&& secretKey == null) {
    throw new Exception("Secret key isnull with authentication on")
}
val requireCookie= if (isAuthOn) "on"else "off"
val secureCookie= if (isAuthOn) secretKey else ""
logDebug(s"IncreateActorSystem, requireCookie is: $requireCookie")
//默认值是什么都没有
val akkaSslConfig= securityManager.akkaSSLOptions.createAkkaConfig
      .getOrElse(ConfigFactory.empty())

/** 导入importscala.collection.JavaConverters._做相应的隐式转换，将scala的map变成java的Map
      隐式转换的定义是在JavaConverters._父接口类DecorateAsJava做的
       conf.getAkkaConf,其中SparkConf如果有akka的配制树信息，key需要akka开始，如果key写成spark.akka会和spark冲突
      withFallback()它的特性是：如果第一次相应的key值，后面再有相同的key值，后面的key值不会覆盖前面的key值
      ############加载配制也可以使用配制文件
    *
    * akka.remote.netty.tcp.hostname：就是当前节点的ip,和别的actorSystem做交互用的
    * akka.remote.netty.tcp.port:值是0时，port会取随机值
    */

val akkaConf= ConfigFactory.parseMap(conf.getAkkaConf.toMap.asJava)
   .withFallback(akkaSslConfig).withFallback(ConfigFactory.parseString(
    s"""
    |akka.daemonic = on
    |akka.loggers =[""akka.event.slf4j.Slf4jLogger""]
    |akka.stdout-loglevel ="ERROR"
    |akka.jvm-exit-on-fatal-error = off
    |akka.remote.require-cookie = "$requireCookie"
    |akka.remote.secure-cookie = "$secureCookie"
    |akka.remote.transport-failure-detector.heartbeat-interval= $akkaHeartBeatIntervalS s
   |akka.remote.transport-failure-detector.acceptable-heartbeat-pause = $akkaHeartBeatPausesS s
    |akka.actor.provider ="akka.remote.RemoteActorRefProvider"
    |akka.remote.netty.tcp.transport-class= "akka.remote.transport.netty.NettyTransport"
    |akka.remote.netty.tcp.hostname ="$host"
    |akka.remote.netty.tcp.port = $port
    |akka.remote.netty.tcp.tcp-nodelay =on
   |akka.remote.netty.tcp.connection-timeout = $akkaTimeoutS s
   |akka.remote.netty.tcp.maximum-frame-size = ${akkaFrameSize}B
   |akka.remote.netty.tcp.execution-pool-size = $akkaThreads
   |akka.actor.default-dispatcher.throughput = $akkaBatchSize
    |akka.log-config-on-start = $logAkkaConfig
    |akka.remote.log-remote-lifecycle-events= $lifecycleEvents
    |akka.log-dead-letters = $lifecycleEvents
   |akka.log-dead-letters-during-shutdown = $lifecycleEvents
    """.stripMargin))
//启动ActorSystem。akkaConf:Config实例可以通过：ConfigFactory.load(配制文件路径)、ConfigFactory.parseString(“key=\”value\””)
//或ConfigFactory.parseMap(Map[String,String])
val actorSystem= ActorSystem(name, akkaConf)
//这个provider就是由akka.actor.provider = "akka.remote.RemoteActorRefProvider"得到的，然后从RemoteActorRefProvider得到对应的port
val provider= actorSystem.asInstanceOf[ExtendedActorSystem].provider
val boundPort= provider.getDefaultAddress.port.get
(actorSystem, boundPort)//最后将actorSystem和port返回
}