poll方式:
package test01
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.ReceiverInputDStream
import org.apache.spark.streaming.flume.{FlumeUtils, SparkFlumeEvent}
import org.apache.spark.streaming.{Seconds, StreamingContext}
object SparkFlumePollDemo extends App {
private val conf: SparkConf = new SparkConf().setAppName("flume02").setMaster("local[2]")
private val ssc = new StreamingContext(conf,Seconds(5))
//TODO poll方式
private val flumeStream= FlumeUtils.createPollingStream(ssc,"192.168.106.107",555)
flumeStream.map(x=>new String(x.event.getBody.array()).trim)
.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).print()
ssc.start()
ssc.awaitTermination()
}
push方式
package test01
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.ReceiverInputDStream
import org.apache.spark.streaming.flume.{FlumeUtils, SparkFlumeEvent}
import org.apache.spark.streaming.{Seconds, StreamingContext}
object SparkFlumePushDemo extends App {
private val conf: SparkConf = new SparkConf().setAppName("flume01").setMaster("local[2]")
private val ssc = new StreamingContext(conf,Seconds(5))
private val flumeStream: ReceiverInputDStream[SparkFlumeEvent] = FlumeUtils.createStream(ssc,"192.168.106.107",555)
flumeStream.map(x=>new String(x.event.getBody.array()).trim)
.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).print()
ssc.start()
ssc.awaitTermination()
}
按照此博客的配置 打胖包
flume的conf文件
stream-flume.conf
a1.sources = s1
a1.channels = c1
a1.sinks = k1
a1.sources.s1.type = netcat
a1.sources.s1.bind = 192.168.106.107
a1.sources.s1.port = 44444
a1.sources.s1.channels = c1
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
# AvroSink向Spark(55555)推送数据
# 使用push createStream
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = 192.168.106.107
a1.sinks.k1.port = 55555
a1.sinks.k1.channel = c1
启动flume
flume-ng agent --name a1 --conf conf/ --conf-file /路径/stream-flume.conf -Dflume.root.logger=INFO,console
运行上传的spark jar包
spark-submit --class 包名字.tes01.SparkFlumePushDemo /路径/spark-1.0-SNAPSHOT.jar
启动telnet
telnet 192.168.106.107 44444
二、
将以下3个jar包导入到flume的lib目录下,并删除对应的低版本的jar包
conf文件
agent.sources = s1
agent.channels = c1
agent.sinks = sk1
#设置Source的内省为netcat,使用的channel为c1
agent.sources.s1.type = netcat
agent.sources.s1.bind = 19
agent.sources.s1.port = 444
agent.sources.s1.channels = c1
#SparkSink,要求flume lib目录存在spark-streaming-flume-sink_2.11-x.x.x.jar
agent.sinks.sk1.type=org.apache.spark.streaming.flume.sink.SparkSink
agent.sinks.sk1.hostname=192.168.106.107
agent.sinks.sk1.port=555
agent.sinks.sk1.channel = c1
#设置channel信息
#内存模式
agent.channels.c1.type = memory
agent.channels.c1.capacity = 1000
步骤同上