在这篇文章中,我将介绍如何在Spark中使用Akka-http并结合Cassandra实现REST服务,在这个系统中Cassandra用于数据的存储。 我们已经见识到Spark的威力,如果和Cassandra正确地结合可以实现更强大的系统。我们先创建一个build.sbt
文件,内容如下:
name := "cassandra-spark-akka-http-starter-kit" version := "1.0" scalaVersion := "2.11.8" organization := "com.iteblog" val akkaV = "2.4.5" libraryDependencies ++ = Seq( "org.apache.spark" % "spark-core_2.11" % "2.0.0" , "org.apache.spark" % "spark-sql_2.11" % "2.0.0" , "com.typesafe.akka" %% "akka-http-core" % akkaV, "com.typesafe.akka" %% "akka-http-experimental" % akkaV, "com.typesafe.akka" %% "akka-http-testkit" % akkaV % "test" , "com.typesafe.akka" %% "akka-http-spray-json-experimental" % akkaV, "org.scalatest" %% "scalatest" % "2.2.6" % "test" , "com.datastax.spark" % "spark-cassandra-connector_2.11" % "2.0.0-M3" , "net.liftweb" % "lift-json_2.11" % "2.6.2" ) assembleArtifact in assemblyPackageScala := false assemblyMergeStrategy in assembly := { case m if m.toLowerCase.endsWith( "manifest.mf" ) = > MergeStrategy.discard case m if m.toLowerCase.matches( "meta-inf.*\\.sf$" ) = > MergeStrategy.discard case "reference.conf" = > MergeStrategy.concat case _ = > MergeStrategy.first } ivyScala := ivyScala.value map { _ .copy(overrideScalaVersion = true ) } fork in run := true |
上面我们把 assembleArtifact in assemblyPackageScala
设置为false,因为Spark已经包含了Scala library,所以我们不需要再包含了。
样本类User定义
User累仅仅包含id、名字以及Email等信息,定义如下:
package com.iteblog.domain case class User(id : String, name : String, email : String) |
数据访问层
下面代码片段是数据访问层的实现:
package com.iteblog.factories import com.iteblog.domain.User import com.typesafe.config.ConfigFactory import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession import com.datastax.spark.connector. _ import scala.util.Try trait DatabaseAccess { import Context. _ def create(user : User) : Boolean = Try(sc.parallelize(Seq(user)).saveToCassandra(keyspace, tableName)).toOption.isDefined def retrieve(id : String) : Option[Array[User]] = Try(sc.cassandraTable[User](keyspace, tableName).where(s "id='$id'" ).collect()).toOption } object DatabaseAccess extends DatabaseAccess object Context { val config = ConfigFactory.load() val url = config.getString( "cassandra.url" ) val sparkConf : SparkConf = new SparkConf().setAppName( "Saprk-cassandra-akka-rest-example" ).setMaster( "local[4]" ) .set( "spark.cassandra.connection.host" , url) val spark = SparkSession.builder().config(sparkConf).getOrCreate() val sc = spark.sparkContext val keyspace = config.getString( "cassandra.keyspace" ) val tableName = config.getString( "cassandra.tableName" ) } |
服务层
下面是路由文件的实现代码:
package com.iteblog.routes import java.util.UUID import akka.actor.ActorSystem import akka.event.Logging import akka.http.scaladsl.model. _ import akka.http.scaladsl.server.Directives. _ import akka.http.scaladsl.server.{ExceptionHandler, Route} import akka.stream.ActorMaterializer import com.iteblog.domain.User import com.iteblog.factories.DatabaseAccess import net.liftweb.json. _ import java.util.Date import net.liftweb.json.Extraction. _ trait SparkService extends DatabaseAccess { implicit val system : ActorSystem implicit val materializer : ActorMaterializer val logger = Logging(system, getClass) implicit def myExceptionHandler = ExceptionHandler { case e : ArithmeticException = > extractUri { uri = > complete(HttpResponse(StatusCodes.InternalServerError, entity = s "Data is not persisted and something went wrong" )) } } implicit val formats : Formats = new DefaultFormats { outer = > override val typeHintFieldName = "type" override val typeHints = ShortTypeHints(List(classOf[String], classOf[Date])) } val sparkRoutes : Route = { get { path( "create" / "name" / Segment / "email" / Segment) { (name : String, email : String) = > complete { val documentId = "user::" + UUID.randomUUID().toString try { val user = User(documentId,name,email) val isPersisted = create(user) if (isPersisted) { HttpResponse(StatusCodes.Created, entity = s "Data is successfully persisted with id $documentId" ) } else { HttpResponse(StatusCodes.InternalServerError, entity = s "Error found for id : $documentId" ) } } catch { case ex : Throwable = > logger.error(ex, ex.getMessage) HttpResponse(StatusCodes.InternalServerError, entity = s "Error found for id : $documentId" ) } } } } ~ path( "retrieve" / "id" / Segment) { (listOfIds : String) = > get { complete { try { val idAsRDD : Option[Array[User]] = retrieve(listOfIds) idAsRDD match { case Some(data) = > HttpResponse(StatusCodes.OK, entity = data.headOption.fold( "" )(x = > compact(render(decompose(x))))) case None = > HttpResponse(StatusCodes.InternalServerError, entity = s "Data is not fetched and something went wrong" ) } } catch { case ex : Throwable = > logger.error(ex, ex.getMessage) HttpResponse(StatusCodes.InternalServerError, entity = s "Error found for ids : $listOfIds" ) } } } } } } |
服务启动
现在我们需要编写一个用于启动服务的类,其主要目的是启动一个HTTP服务,这样可以供用户调用,如下:
package com.iteblog import akka.actor.ActorSystem import akka.http.scaladsl.Http import akka.stream.ActorMaterializer import com.iteblog.routes.SparkService import com.iteblog.factories.Context class StartSparkServer( implicit val system : ActorSystem, implicit val materializer : ActorMaterializer) extends SparkService { def startServer(address : String, port : Int) = { Http().bindAndHandle(sparkRoutes, address, port) } } object StartApplication extends App { StartApp } object StartApp { implicit val system : ActorSystem = ActorSystem( "Spark-Couchbase-Service" ) implicit val executor = system.dispatcher implicit val materializer = ActorMaterializer() val server = new StartSparkServer() val config = Context.config val serverUrl = config.getString( "http.interface" ) val port = config.getInt( "http.port" ) server.startServer(serverUrl, port) } |