flume 从spooldir到http数据传输过程:读取文件-->String line = readLine()-->将line转化为字节数组放到Event的body中,再批量将Event放到channel中--->http sink获取channel中的Event获取字节数组--->通过httpurlconnection传到接口中。
1、源码下载地址:http://www.apache.org/dyn/closer.lua/flume/1.8.0/apache-flume-1.8.0-src.tar.gz
2、tar包下载地址:http://www.apache.org/dyn/closer.lua/flume/1.8.0/apache-flume-1.8.0-bin.tar.gz
3、配置文件
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = spooldir
a1.sources.r1.spoolDir = /opt/datasyn/logdata
a1.sources.r1.fileHeader = true
a1.sources.r1.inputCharset= GB2312
a1.sources.r1.maxBackoff=4000
#filter
a1.sources.r1.interceptors= i1
a1.sources.r1.interceptors.i1.type= regex_filter
#获取含有WB10字符的数据
a1.sources.r1.interceptors.i1.regex= .*WB10.*
# Describe the sink
a1.sinks.k1.type = http
a1.sinks.k1.channel=c1
a1.sinks.k1.endpoint=http://localhost:8888/httpConnection
a1.sinks.k1.contentTypeHeader=application/json
a1.sinks.k1.acceptHeader=application/json
a1.sinks.k1.defaultBackoff = true
a1.sinks.k1.defaultRollback = true
a1.sinks.k1.defaultIncrementMetrics = false
a1.sinks.k1.backoff.4XX = false
a1.sinks.k1.rollback.4XX = false
a1.sinks.k1.incrementMetrics.4XX = true
a1.sinks.k1.backoff.200 = false
a1.sinks.k1.rollback.200 = false
a1.sinks.k1.incrementMetrics.200 = true
a1.sinks.k1.topic = topic
a1.sinks.k1.key = key
#a1.sinks.k1.keyword =
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
4、spoolDir文件的编码不规整,可能含有俩种编码的时候,运行是无法通过的时候:
将flume-ng-core-1.8.0.jar------>org.apache.flume.source包------->SpoolDirectorySourceConfigurationConstants类中的
DEFAULT_DECODE_ERROR_POLICY修改为DecodeErrorPolicy.IGNORE.name();
5、自定义http sink 以及自定义过滤内容:
将flume-http-sink-1.8.0.jar中的HttpURLConnection代码修改为:
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.flume.sink.http;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.UnmodifiableIterator;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.Base64;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.flume.Channel;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.Sink;
import org.apache.flume.Sink.Status;
import org.apache.flume.Transaction;
import org.apache.flume.conf.Configurable;
import org.apache.flume.instrumentation.SinkCounter;
import org.apache.flume.sink.AbstractSink;
import org.apache.log4j.Logger;
public class HttpSink extends AbstractSink implements Configurable {
private static final Logger LOG = Logger.getLogger(HttpSink.class);
private static final int HTTP_STATUS_CONTINUE = 100;
private static final int DEFAULT_CONNECT_TIMEOUT = 5000;
private static final int DEFAULT_REQUEST_TIMEOUT = 5000;
private static final String DEFAULT_CONTENT_TYPE = "text/plain";
private static final String DEFAULT_ACCEPT_HEADER = "text/plain";
private URL endpointUrl;
private SinkCounter sinkCounter;
private int connectTimeout;
private int requestTimeout;
private String contentTypeHeader;
private String acceptHeader;
//自定义参数
private String topic;
private String key;
private String keyword;
private boolean defaultBackoff;
private boolean defaultRollback;
private boolean defaultIncrementMetrics;
private HashMap<String, Boolean> backoffOverrides;
private HashMap<String, Boolean> rollbackOverrides;
private HashMap<String, Boolean> incrementMetricsOverrides;
private ConnectionBuilder connectionBuilder;
public HttpSink() {
this.connectTimeout = 5000;
this.requestTimeout = 5000;
this.contentTypeHeader = "text/plain";
this.acceptHeader = "text/plain";
this.backoffOverrides = new HashMap();
this.rollbackOverrides = new HashMap();
this.incrementMetricsOverrides = new HashMap();
}
public final void configure(Context context) {
String configuredEndpoint = context.getString("endpoint", "");
LOG.info("Read endpoint URL from configuration : " + configuredEndpoint);
try {
this.endpointUrl = new URL(configuredEndpoint);
} catch (MalformedURLException e) {
throw new IllegalArgumentException("Endpoint URL invalid", e);
}
this.connectTimeout = context.getInteger("connectTimeout", Integer.valueOf(5000)).intValue();
if (this.connectTimeout <= 0) {
throw new IllegalArgumentException("Connect timeout must be a non-zero and positive");
}
LOG.info("Using connect timeout : " + this.connectTimeout);
this.requestTimeout = context.getInteger("requestTimeout", Integer.valueOf(5000)).intValue();
if (this.requestTimeout <= 0) {
throw new IllegalArgumentException("Request timeout must be a non-zero and positive");
}
LOG.info("Using request timeout : " + this.requestTimeout);
this.acceptHeader = context.getString("acceptHeader", "text/plain");
LOG.info("Using Accept header value : " + this.acceptHeader);
//参数赋值
//topic
this.topic = context.getString("topic", "web_user_gbase");
LOG.info("Using Accept topic value : " + this.topic);
//key
this.key = context.getString("key", "web_user_gbase");
LOG.info("Using Accept key value : " + this.key);
//keyword
//this.keyword = context.getString("keyword", "");
//LOG.info("Using Accept keyword value : " + this.keyword);
this.contentTypeHeader = context.getString("contentTypeHeader", "text/plain");
LOG.info("Using Content-Type header value : " + this.contentTypeHeader);
this.defaultBackoff = context.getBoolean("defaultBackoff", Boolean.valueOf(true)).booleanValue();
LOG.info("Channel backoff by default is " + this.defaultBackoff);
this.defaultRollback = context.getBoolean("defaultRollback", Boolean.valueOf(true)).booleanValue();
LOG.info("Transaction rollback by default is " + this.defaultRollback);
this.defaultIncrementMetrics = context.getBoolean("defaultIncrementMetrics", Boolean.valueOf(false))
.booleanValue();
LOG.info("Incrementing metrics by default is " + this.defaultIncrementMetrics);
parseConfigOverrides("backoff", context, this.backoffOverrides);
parseConfigOverrides("rollback", context, this.rollbackOverrides);
parseConfigOverrides("incrementMetrics", context, this.incrementMetricsOverrides);
if (this.sinkCounter == null) {
this.sinkCounter = new SinkCounter(getName());
}
this.connectionBuilder = new ConnectionBuilder();
}
public final void start() {
LOG.info("Starting HttpSink");
this.sinkCounter.start();
}
public final void stop() {
LOG.info("Stopping HttpSink");
this.sinkCounter.stop();
}
public final Sink.Status process() throws EventDeliveryException {
Sink.Status status = null;
PrintWriter outputStream = null;
Channel ch = getChannel();
Transaction txn = ch.getTransaction();
txn.begin();
try {
Event event = ch.take();
byte[] eventBody = null;
if (event != null) {
eventBody = event.getBody();
}
if ((eventBody != null) && (eventBody.length > 0) && (new
String(eventBody).indexOf(keyword)>=0)) {//自定义过滤
this.sinkCounter.incrementEventDrainAttemptCount();
LOG.debug("Sending request : " + new String(event.getBody()));
try {
HttpURLConnection connection =this.connectionBuilder.getConnection();
outputStream = new PrintWriter(connection.getOutputStream());
//给接口传递参数
outputStream.write("topic="+topic+"&key="+key+"&value=" + new
String(eventBody));
outputStream.flush();
outputStream.close();
int httpStatusCode = connection.getResponseCode();
LOG.debug("Got status code : " + httpStatusCode);
if (httpStatusCode < 400) {
connection.getInputStream().close();
} else {
LOG.debug("bad request");
connection.getErrorStream().close();
}
LOG.debug("Response processed and closed");
if (httpStatusCode >= 100) {
String httpStatusString = String.valueOf(httpStatusCode);
boolean shouldRollback = findOverrideValue(httpStatusString, this.rollbackOverrides,
this.defaultRollback);
if (shouldRollback)
txn.rollback();
else {
txn.commit();
}
boolean shouldBackoff = findOverrideValue(httpStatusString, this.backoffOverrides,
this.defaultBackoff);
if (shouldBackoff) {
status = Sink.Status.BACKOFF;
} else {
status = Sink.Status.READY;
}
boolean shouldIncrementMetrics = findOverrideValue(httpStatusString,
this.incrementMetricsOverrides, this.defaultIncrementMetrics);
if (shouldIncrementMetrics) {
this.sinkCounter.incrementEventDrainSuccessCount();
}
if (shouldRollback) {
if (shouldBackoff) {
LOG.info(String.format(
"Got status code %d from HTTP server. Rolled back event and backed off.",
new Object[] { Integer.valueOf(httpStatusCode) }));
}
} else {
LOG.info(String.format("Got status code %d from HTTP server. Rolled back event for retry.",
new Object[] { Integer.valueOf(httpStatusCode) }));
}
} else {
txn.rollback();
status = Sink.Status.BACKOFF;
LOG.warn("Malformed response returned from server, retrying");
}
} catch (IOException e) {
txn.rollback();
status = Sink.Status.BACKOFF;
LOG.error("Error opening connection, or request timed out", e);
}
} else {
txn.commit();
status = Sink.Status.BACKOFF;
LOG.warn("Processed empty event");
}
} catch (Throwable t) {
txn.rollback();
status = Sink.Status.BACKOFF;
LOG.error("Error sending HTTP request, retrying", t);
if (t instanceof Error)
throw ((Error) t);
} finally {
txn.close();
if (outputStream != null)
try {
outputStream.close();
} catch (Exception localIOException1) {
}
}
return status;
}
private void parseConfigOverrides(String propertyName, Context context, Map<String, Boolean> override) {
ImmutableMap config = context.getSubProperties(propertyName + ".");
if (config != null)
for (UnmodifiableIterator localUnmodifiableIterator = config.entrySet()
.iterator(); localUnmodifiableIterator.hasNext();) {
Map.Entry value = (Map.Entry) localUnmodifiableIterator.next();
LOG.info(String.format("Read %s value for status code %s as %s",
new Object[] { propertyName, value.getKey(), value.getValue() }));
if (override.containsKey(value.getKey()))
LOG.warn(String.format("Ignoring duplicate config value for %s.%s",
new Object[] { propertyName, value.getKey() }));
else
override.put((String) value.getKey(), Boolean.valueOf((String) value.getValue()));
}
}
private boolean findOverrideValue(String statusCode, HashMap<String, Boolean> overrides, boolean defaultValue) {
Boolean overrideValue = (Boolean) overrides.get(statusCode);
if (overrideValue == null) {
overrideValue = (Boolean) overrides.get(statusCode.substring(0, 1) + "XX");
if (overrideValue == null)
overrideValue = Boolean.valueOf(defaultValue);
}
return overrideValue.booleanValue();
}
final void setConnectionBuilder(ConnectionBuilder builder) {
this.connectionBuilder = builder;
}
final void setSinkCounter(SinkCounter newSinkCounter) {
this.sinkCounter = newSinkCounter;
}
class ConnectionBuilder {
public HttpURLConnection getConnection() throws IOException {
/*
* HttpURLConnection connection =
* (HttpURLConnection)HttpSink.access$000(this.this$0)
* .openConnection();
*/
HttpURLConnection connection = (HttpURLConnection) endpointUrl.openConnection();
connection.setRequestMethod("POST");
//connection.setRequestProperty("Content-Type", contentTypeHeader);
connection.setRequestProperty("Accept", "application/json; charset=UTF-8");
connection.setConnectTimeout(connectTimeout);
connection.setReadTimeout(connectTimeout);
connection.setDoOutput(true);
connection.setDoInput(true);
connection.connect();
return connection;
}
}
}
6、http接口:
@RequestMapping("/httpConnection")
@ResponseBody
public void httpUrlConnectionClient(@RequestParam String topic,@RequestParam String key,@RequestParam String value) throws UnsupportedEncodingException{
System.out.println(topic+"++++"+key+"++++"+value);
}
7 、汉字乱码 :源码中的编码格式不必改变,通过设置配置文件中的inputCharset的编码控制数据源的数据,可先将数据打印到控制台尝试:
# Describe the sink
a1.sinks.k1.type = logger
启动命令(先到flume的安装目录):
bin/flume-ng agent -c ./conf -f ./conf/spool-logger.conf -n a1 -Dflume.root.logger=INFO,console
查找乱码的时候注意查看Linux系统的编码:命令:locale
修改编码:vi .bashrc 添加如下代码
export LANG=en_US.UTF-8
export LC_CTYPE="en_US.UTF-8"
export LC_NUMERIC="en_US.UTF-8"
export LC_TIME="en_US.UTF-8"
export LC_COLLATE="en_US.UTF-8"
export LC_MONETARY="en_US.UTF-8"
export LC_MESSAGES="en_US.UTF-8"
export LC_PAPER="en_US.UTF-8"
export LC_NAME="en_US.UTF-8"
export LC_ADDRESS="en_US.UTF-8"
export LC_TELEPHONE="en_US.UTF-8"
export LC_MEASUREMENT="en_US.UTF-8"
export LC_IDENTIFICATION="en_US.UTF-8"