启动环境
zkService.sh start
kafka-server-shart.sh /opt/soft/kafka211/config/server.properties
配置sources文件flume-kafka
cd /opt/flumeconf
vi conf_08011_kafka.properties
a11.channels=c11
a11.sources=s11
a11.sinks=k11
a11.sources.s11.type=spooldir
a11.sources.s11.spoolDir=/opt/retail_db-csv
a11.sources.s11.interceptors=head_filter
a11.sources.s11.interceptors.head_filter.type=regex_filter
a11.sources.s11.interceptors.head_filter.regex=^user.*
a11.sources.s11.interceptors.head_filter.excludeEvents=true
a11.sources.s11.deserializer.maxLineLength=60000
a11.sinks.k11.type=org.apache.flume.sink.kafka.KafkaSink
a11.sinks.k11.kafka.bootstrap.servers=192.168.56.120:9092
a11.sinks.k11.kafka.topic=userfriedns
a11.channels.c11.type=memory
a11.channels.c11.capacity=60000
a11.channels.c11.transactionCapatity=60000
a11.sinks.k11.channel=c11
a11.sources.s11.channels=c11
创建Kafka-topics
kafka-topics.sh --create --zookeeper 192.168.56.129:2181 --topic mydemo --replication-factor 1 --partitions 1
flume导入kafka
flume-ng agent -n a11 -c conf -f /opt/flumeconf/conf_0807_kafka.properties
1、spark streaming向Kafka读数据
package com.njbdqn.myspark.kafka_sparksteaming.demo1
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.streaming.kafka010.{
ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{
Seconds, StreamingContext}
import org.apache.spark.{
SparkConf, SparkContext}
object MyReadKafkaHandler {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[*]").setAppName("name")
val sc = new SparkContext(conf)
val ssc = new StreamingContext(sc,Seconds(10))
val kafkaParam = Map(
ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG->"192.168.56.120:9092",
ConsumerConfig.GROUP_ID_CONFIG->"mykafka14",
ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG->"true",
ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG->"20000",
ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG->classOf[StringDeserializer],
ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG->classOf[StringDeserializer],
ConsumerConfig.AUTO_OFFSET_RESET_CONFIG->"earliest"
)
val streams = KafkaUtils.createDirectStream(ssc, LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](Set("userfriedns"), kafkaParam))
val value = streams.map(_.value).filter(_.split(",").size > 1).flatMap(line => {
val ids = line.split(",")
ids(1)