spark如何读写Sequoiadb,最近被客户问多了,这个记录下。
Spark读Sequoiadb数据:
package marketing import com.sequoiadb.hadoop.io.BSONWritable import com.sequoiadb.hadoop.mapreduce.SequoiadbInputFormat import org.apache.hadoop.conf.Configuration import org.apache.spark.{SparkContext, SparkConf} /** * Created by joy on 2015/12/15. */ object Read extends App { val conf = new SparkConf().setAppName("cgbdata"). setMaster("local").registerKryoClasses(Array(classOf[BSONWritable])) val sc = new SparkContext(conf) val hadoopConfig = new Configuration() hadoopConfig.set("sequoiadb.input.url","master:11810,slave1:11810,slave2:11810") hadoopConfig.set("sequoiadb.in.collectionspace","default") hadoopConfig.set("sequoiadb.in.collection","bar") val sdbRDD = sc.newAPIHadoopRDD[Object,BSONWritable,SequoiadbInputFormat](hadoopConfig,classOf[SequoiadbInputFormat],classOf[Object], classOf[BSONWritable]) sdbRDD.map(_._2.getBson).collect.map(println) sc.stop() }
Spark写Sequoiadb
package marketing
import com.sequoiadb.hadoop.io.BSONWritable
import com.sequoiadb.hadoop.mapreduce.SequoiadbOutputFormat
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.{NullWritable, IntWritable}
import org.apache.spark.{SparkConf, SparkContext}
import org.bson.BasicBSONObject
import org.bson.types.ObjectId
/**
* Hello world!
*
*/
object Save extends App {
val sparkconf = new SparkConf().setMaster("local[2]").setAppName("save").registerKryoClasses(Array(classOf[BSONWritable]))
val sc = new SparkContext(sparkconf)
var data = sc.parallelize(List((NullWritable.get(),new BSONWritable(new BasicBSONObject("name","gaoxing")))))
val config = new Configuration()
config.set("sequoiadb.output.url","master:11810")
config.set("sequoiadb.out.collectionspace","foo")
config.set("sequoiadb.out.collection","bar")
data.saveAsNewAPIHadoopFile("",classOf[NullWritable],classOf[BSONWritable],classOf[SequoiadbOutputFormat],config)
}