需求
Kafka + SparkStreaming + SparkSQL + HBase
输出TOP5的排名结果
排名作为Rowkey,word和count作为Column
实现
创建kafka生产者模拟随机生产数据
object producer {
def main(args: Array[String]): Unit = {
val topic ="words"
val brokers ="master:9092,slave1:9092,slave2:9092"
val prop=new Properties()
prop.put("metadata.broker.list",brokers)
prop.put("serializer.class", "kafka.serializer.StringEncoder")
val kafkaConfig=new ProducerConfig(prop)
val producer=new Producer[String,String](kafkaConfig)
val content:Array[String]=new Array[String](5)
content(0)="kafka kafka produce"
content(1)="kafka produce message"
content(2)="hello world hello"
content(3)="wordcount topK topK"
content(4)="hbase spark kafka"
while (true){
val i=(math.random*5).toInt
producer.send(new KeyedMessage[String,String](topic,content(i)))
println(content(i))
Thread.sleep(200)
}
}
}
创建spark streaming
val conf = new SparkConf().setMaster("local[2]").setAppName("Networkcount")
val sc = new SparkContext(conf)
val ssc = new StreamingContext(sc, Seconds(1))
配置kafka,通过KafkaUtils.createDirectStream读取kafka传递过来的数据