Akka是Actor编程模型的库,用于提供高并发的Actor编程模型,Spark即利用ActorSystem实现分布式消息的发送和并发编程。下面将通过源码分析来观察Spark中ActorSystem的创建与启动。
首先在SparkEnv.scala中
// Create a ActorSystem for legacy codes,即通过AkkaUtils.createActorSystem方法创建ActorSystem
AkkaUtils.createActorSystem(
actorSystemName + "ActorSystem",
hostname,
actorSystemPort,
conf,
securityManager
)._1
在AkkaUtils.scala中可以发现createActorSystem的源码
def createActorSystem(
//传入name、host、port、conf等信息,返回值即startServiceOnPort的返回值
name: String, host: String, port: Int, conf: SparkConf, securityManager: SecurityManager): (ActorSystem, Int) = {
//对doCreateActorSystem函数进行柯里化,实际通过doCreateActorSystem启动Akka
val startService: Int => (ActorSystem, Int) = { actualPort => doCreateActorSystem(name, host, actualPort, conf, securityManager) }
//返回值是(T,int)在这里是(ActorSystem,int)
Utils.startServiceOnPort(port, startService, conf, name)}
在Utils.scala中可以看到在startServiceOnPort中调用柯里化后的doCreateActorSystem启动Akka的代码
def startServiceOnPort[T](
startPort: Int,
startService: Int => (T, Int),
conf: SparkConf,
serviceName: String = ""): (T, Int) = {
require(startPort == 0 || (1024 <= startPort && startPort < 65536),
"startPort should be between 1024 and 65535 (inclusive), or 0 for a random free port.")
val serviceString = if (serviceName.isEmpty) "" else s" '$serviceName'"
val maxRetries = portMaxRetries(conf)
for (offset <- 0 to maxRetries) {
// Do not increment port if startPort is 0, which is treated as a special port
val tryPort = if (startPort == 0) {
startPort
} else {
// If the new port wraps around, do not try a privilege port
((startPort + offset - 1024) % (65536 - 1024)) + 1024
}
try {//startServiceOnPort中调用startService,在本例中传入的startService就是柯里化后的doCreateActorSystem用于启动Akka
val (service, port) = startService(tryPort) logInfo(s"Successfully started service$serviceString on port $port.") return (service, port) } catch { case e: Exception if isBindCollision(e) => if (offset >= maxRetries) { val exceptionMessage =
s"${e.getMessage}: Service$serviceString failed after " + s"$maxRetries retries! Consider explicitly setting the appropriate port for the " + s"service$serviceString (for example spark.ui.port for SparkUI) to an available " + "port or increasing spark.port.maxRetries."
val exception = new BindException(exceptionMessage) // restore original stack trace exception.setStackTrace(e.getStackTrace) throw exception } logWarning(s"Service$serviceString could not bind on port $tryPort. " + s"Attempting port ${tryPort + 1}.") } } //
Should never happen throw new SparkException(s"Failed to start service$serviceString on port $startPort") }
现在我们可以分析一下用于启动Akka的函数doCreateActorSystem的源码,该源码同样在AkkaUtils.scala中
private def doCreateActorSystem(
name: String,
host: String,
port: Int,
conf: SparkConf,
securityManager: SecurityManager): (ActorSystem, Int) = {
//对conf中的一些参数进行设置
val akkaThreads = conf.getInt("spark.akka.threads", 4)
val akkaBatchSize = conf.getInt("spark.akka.batchSize", 15)
val akkaTimeoutS = conf.getTimeAsSeconds("spark.akka.timeout",
conf.get("spark.network.timeout", "120s"))
val akkaFrameSize = maxFrameSizeBytes(conf)
val akkaLogLifecycleEvents = conf.getBoolean("spark.akka.logLifecycleEvents", false)
val lifecycleEvents = if (akkaLogLifecycleEvents) "on" else "off"
if (!akkaLogLifecycleEvents) {
// As a workaround for Akka issue #3787, we coerce the "EndpointWriter" log to be silent.
// See: https://www.assembla.com/spaces/akka/tickets/3787#/
Option(Logger.getLogger("akka.remote.EndpointWriter")).map(l => l.setLevel(Level.FATAL))
}
val logAkkaConfig = if (conf.getBoolean("spark.akka.logAkkaConfig", false)) "on" else "off"
val akkaHeartBeatPausesS = conf.getTimeAsSeconds("spark.akka.heartbeat.pauses", "6000s")
val akkaHeartBeatIntervalS = conf.getTimeAsSeconds("spark.akka.heartbeat.interval", "1000s")
val secretKey = securityManager.getSecretKey()
val isAuthOn = securityManager.isAuthenticationEnabled()
if (isAuthOn && secretKey == null) {
throw new Exception("Secret key is null with authentication on")
}
val requireCookie = if (isAuthOn) "on" else "off"
val secureCookie = if (isAuthOn) secretKey else ""
logDebug(s"In createActorSystem, requireCookie is: $requireCookie")
val akkaSslConfig = securityManager.akkaSSLOptions.createAkkaConfig
.getOrElse(ConfigFactory.empty())
val akkaConf = ConfigFactory.parseMap(conf.getAkkaConf.toMap.asJava)
.withFallback(akkaSslConfig).withFallback(ConfigFactory.parseString(
s"""
|akka.daemonic = on
|akka.loggers = [""akka.event.slf4j.Slf4jLogger""]
|akka.stdout-loglevel = "ERROR"
|akka.jvm-exit-on-fatal-error = off
|akka.remote.require-cookie = "$requireCookie"
|akka.remote.secure-cookie = "$secureCookie"
|akka.remote.transport-failure-detector.heartbeat-interval = $akkaHeartBeatIntervalS s
|akka.remote.transport-failure-detector.acceptable-heartbeat-pause = $akkaHeartBeatPausesS s
|akka.actor.provider = "akka.remote.RemoteActorRefProvider"
|akka.remote.netty.tcp.transport-class = "akka.remote.transport.netty.NettyTransport"
|akka.remote.netty.tcp.hostname = "$host"
|akka.remote.netty.tcp.port = $port
|akka.remote.netty.tcp.tcp-nodelay = on
|akka.remote.netty.tcp.connection-timeout = $akkaTimeoutS s
|akka.remote.netty.tcp.maximum-frame-size = ${akkaFrameSize}B
|akka.remote.netty.tcp.execution-pool-size = $akkaThreads
|akka.actor.default-dispatcher.throughput = $akkaBatchSize
|akka.log-config-on-start = $logAkkaConfig
|akka.remote.log-remote-lifecycle-events = $lifecycleEvents
|akka.log-dead-letters = $lifecycleEvents
|akka.log-dead-letters-during-shutdown = $lifecycleEvents
""".stripMargin))
//创建ActorSystem对象
val actorSystem = ActorSystem(name, akkaConf) val provider = actorSystem.asInstanceOf[ExtendedActorSystem].provider val boundPort = provider.getDefaultAddress.port.get
//返回(ActorSystem,int)元组
(actorSystem, boundPort) }
以上,我们通过源码分析追踪了Spark通过ActorSystem的构造器创建ActorSystem对象的整个过程。