1.Log4j日志==>Flume==>Kafka
flume_kafka.conf
agent1.sources = avro-source
agent1.channels = logger-channel
agent1.sinks = kafka-sink
# define source
agent1.sources.avro-source.type=avro
agent1.sources.avro-source.bind=0.0.0.0
agent1.sources.avro-source.port=41414
# define channel
agent1.channels.logger-channel.type=memory
# define sink
agent1.sinks.kafka-sink.type=org.apache.flume.sink.kafka.KafkaSink
agent1.sinks.kafka-sink.kafka.topic = kafka_streaming_topic
agent1.sinks.kafka-sink.kafka.bootstrap.servers = hadoop01:9092
agent1.sinks.kafka-sink.kafka.flumeBatchSize = 20
agent1.sinks.kafka-sink.kafka.producer.acks = 1
agent1.sources.avro-source.channels=logger-channel
agent1.sinks.kafka-sink.channel=logger-channel
参考官网:https://flume.apache.org/releases/content/1.9.0/FlumeUserGuide.html#kafka-sink
2. log4j.properties
log4j.rootLogger=INFO,stdout,flume
log4j.appender.stdout = org.apache.log4j.ConsoleAppender
log4j.appender.stdout.target = System.out
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} [%t] [%c] [%p] - %m%n
log4j.appender.flume = org.apache.flume.clients.log4jappender.Log4jAppender
log4j.appender.flume.Hostname = hadoop01
log4j.appender.flume.Port = 41414
log4j.appender.flume.UnsafeMode = true
参考官网:https://flume.apache.org/releases/content/1.9.0/FlumeUserGuide.html#load-balancing-log4j-appender
3. 编写简单代码打印log4j日志
import org.apache.log4j.Logger;
public class test {
private static Logger logger = Logger.getLogger(test.class);
public static void main(String[] args) throws Exception {
int i = 0;
while (true) {
Thread.sleep(1000);
logger.info(i++);
}
}
}
- 启动Zookeeper
zkServer.sh start
- 启动Kafka
kafka-server-start.sh -daemon $KAFKA_HOME/config/server.properties &
- 启动flume
flume-ng agent \
--name agent1 \
--conf $FLUME_HOME/conf \
--conf-file $FLUME_HOME/conf/flume_kafka.conf \
-Dflume.root.logger=INFO,console
- 开启消费者
kafka-console-consumer.sh \
--zookeeper hadoop01:2181 \
--topic kafka_streaming_topic
- 编写Spark-Streaming消费者并运行
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
object kafka_streaming {
Logger.getLogger("org").setLevel(Level.ERROR)
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setAppName("kafka_streaming").setMaster("local[*]").set("spark.port.maxRetries","100")
val ssc = new StreamingContext(sparkConf, Seconds(5))
val messages = KafkaUtils.createStream(ssc, "hadoop01:2181", "test", Map("kafka_streaming_topic" -> 1))
messages.map(_._2).count().print()
ssc.start()
ssc.awaitTermination()
}
}