直接上代码
object KafkaToHive{
def main(args: Array[String]){
val sparkConf = new SparkConf().setAppName("KafkaToHive")
val sc = new SparkContext(sparkConf)
val ssc = new StringContext(sc,Seconds(60))
// 创建kafka参数
val kafkaParams = Map[String,Object](
//ip为kafka集群ip,端口为集群端口
"bootstrap.servers" -> "ip1:port1,ip2:port2,ip:port3",
"group.id" -> "KafkaToHive_group1", //自定义组名称
"auto.offset.reset" -> "earliest",
"enable.auto.commit" -> "false")
val topics = Array("test1")
val stream = KafkaUtils.createDirectStreaming[String,String](
ssc,PreferConsistent,
Subscribe[String,String](topics,kafkaParms)
stream.foreachRDD(rdd=>{
if(rdd.count>0){
val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
//TODO 具体处理逻辑
//写入Hive
//value为实际操作中的结果集,即是//TODO返回的结果集
val subRdd = rdd.sparkContext.parallelize(value)
val sqlContext : SQLContext = new HiveContext(rdd.sparkContext)
sqlContext.setConf("hi