flume使用总结

         flume 从spooldir到http数据传输过程:读取文件-->String line = readLine()-->将line转化为字节数组放到Event的body中,再批量将Event放到channel中--->http sink获取channel中的Event获取字节数组--->通过httpurlconnection传到接口中。

1、源码下载地址:http://www.apache.org/dyn/closer.lua/flume/1.8.0/apache-flume-1.8.0-src.tar.gz

2、tar包下载地址:http://www.apache.org/dyn/closer.lua/flume/1.8.0/apache-flume-1.8.0-bin.tar.gz

3、配置文件

# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1

# Describe/configure the source
a1.sources.r1.type = spooldir
a1.sources.r1.spoolDir = /opt/datasyn/logdata
a1.sources.r1.fileHeader = true
a1.sources.r1.inputCharset= GB2312
a1.sources.r1.maxBackoff=4000

#filter
a1.sources.r1.interceptors= i1
a1.sources.r1.interceptors.i1.type= regex_filter
#获取含有WB10字符的数据
a1.sources.r1.interceptors.i1.regex= .*WB10.*

# Describe the sink
a1.sinks.k1.type = http
a1.sinks.k1.channel=c1
a1.sinks.k1.endpoint=http://localhost:8888/httpConnection
a1.sinks.k1.contentTypeHeader=application/json
a1.sinks.k1.acceptHeader=application/json
a1.sinks.k1.defaultBackoff = true
a1.sinks.k1.defaultRollback = true
a1.sinks.k1.defaultIncrementMetrics = false
a1.sinks.k1.backoff.4XX = false
a1.sinks.k1.rollback.4XX = false
a1.sinks.k1.incrementMetrics.4XX = true
a1.sinks.k1.backoff.200 = false
a1.sinks.k1.rollback.200 = false
a1.sinks.k1.incrementMetrics.200 = true
a1.sinks.k1.topic = topic
a1.sinks.k1.key = key
#a1.sinks.k1.keyword =

# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

4、spoolDir文件的编码不规整,可能含有俩种编码的时候,运行是无法通过的时候:

    将flume-ng-core-1.8.0.jar------>org.apache.flume.source包------->SpoolDirectorySourceConfigurationConstants类中的

    DEFAULT_DECODE_ERROR_POLICY修改为DecodeErrorPolicy.IGNORE.name();

 5、自定义http sink 以及自定义过滤内容:

      将flume-http-sink-1.8.0.jar中的HttpURLConnection代码修改为:

   

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.flume.sink.http;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.UnmodifiableIterator;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.Base64;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.flume.Channel;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.Sink;
import org.apache.flume.Sink.Status;
import org.apache.flume.Transaction;
import org.apache.flume.conf.Configurable;
import org.apache.flume.instrumentation.SinkCounter;
import org.apache.flume.sink.AbstractSink;
import org.apache.log4j.Logger;

public class HttpSink extends AbstractSink implements Configurable {
	private static final Logger LOG = Logger.getLogger(HttpSink.class);
	private static final int HTTP_STATUS_CONTINUE = 100;
	private static final int DEFAULT_CONNECT_TIMEOUT = 5000;
	private static final int DEFAULT_REQUEST_TIMEOUT = 5000;
	private static final String DEFAULT_CONTENT_TYPE = "text/plain";
	private static final String DEFAULT_ACCEPT_HEADER = "text/plain";
	private URL endpointUrl;
	private SinkCounter sinkCounter;
	private int connectTimeout;
	private int requestTimeout;
	private String contentTypeHeader;
	private String acceptHeader;
    //自定义参数
	private String topic;
	private String key;
	private String keyword;
	private boolean defaultBackoff;
	private boolean defaultRollback;
	private boolean defaultIncrementMetrics;
	private HashMap<String, Boolean> backoffOverrides;
	private HashMap<String, Boolean> rollbackOverrides;
	private HashMap<String, Boolean> incrementMetricsOverrides;
	private ConnectionBuilder connectionBuilder;

	public HttpSink() {
		this.connectTimeout = 5000;

		this.requestTimeout = 5000;

		this.contentTypeHeader = "text/plain";

		this.acceptHeader = "text/plain";

		this.backoffOverrides = new HashMap();

		this.rollbackOverrides = new HashMap();

		this.incrementMetricsOverrides = new HashMap();
	}

	public final void configure(Context context) {
		String configuredEndpoint = context.getString("endpoint", "");
		LOG.info("Read endpoint URL from configuration : " + configuredEndpoint);
		try {
			this.endpointUrl = new URL(configuredEndpoint);
		} catch (MalformedURLException e) {
			throw new IllegalArgumentException("Endpoint URL invalid", e);
		}

		this.connectTimeout = context.getInteger("connectTimeout", Integer.valueOf(5000)).intValue();

		if (this.connectTimeout <= 0) {
			throw new IllegalArgumentException("Connect timeout must be a non-zero and positive");
		}

		LOG.info("Using connect timeout : " + this.connectTimeout);

		this.requestTimeout = context.getInteger("requestTimeout", Integer.valueOf(5000)).intValue();

		if (this.requestTimeout <= 0) {
			throw new IllegalArgumentException("Request timeout must be a non-zero and positive");
		}

		LOG.info("Using request timeout : " + this.requestTimeout);

		this.acceptHeader = context.getString("acceptHeader", "text/plain");
		LOG.info("Using Accept header value : " + this.acceptHeader);
        //参数赋值
		//topic
		this.topic = context.getString("topic", "web_user_gbase");
		LOG.info("Using Accept topic value : " + this.topic);
		//key
		this.key = context.getString("key", "web_user_gbase");
		LOG.info("Using Accept key value : " + this.key);
		//keyword
		//this.keyword = context.getString("keyword", "");
		//LOG.info("Using Accept keyword value : " + this.keyword);
		
		this.contentTypeHeader = context.getString("contentTypeHeader", "text/plain");

		LOG.info("Using Content-Type header value : " + this.contentTypeHeader);

		this.defaultBackoff = context.getBoolean("defaultBackoff", Boolean.valueOf(true)).booleanValue();
		LOG.info("Channel backoff by default is " + this.defaultBackoff);

		this.defaultRollback = context.getBoolean("defaultRollback", Boolean.valueOf(true)).booleanValue();
		LOG.info("Transaction rollback by default is " + this.defaultRollback);

		this.defaultIncrementMetrics = context.getBoolean("defaultIncrementMetrics", Boolean.valueOf(false))
				.booleanValue();

		LOG.info("Incrementing metrics by default is " + this.defaultIncrementMetrics);

		parseConfigOverrides("backoff", context, this.backoffOverrides);
		parseConfigOverrides("rollback", context, this.rollbackOverrides);
		parseConfigOverrides("incrementMetrics", context, this.incrementMetricsOverrides);

		if (this.sinkCounter == null) {
			this.sinkCounter = new SinkCounter(getName());
		}

		this.connectionBuilder = new ConnectionBuilder();
	}

	public final void start() {
		LOG.info("Starting HttpSink");
		this.sinkCounter.start();
	}

	public final void stop() {
		LOG.info("Stopping HttpSink");
		this.sinkCounter.stop();
	}

	public final Sink.Status process() throws EventDeliveryException {
		Sink.Status status = null;
		PrintWriter outputStream = null;

		Channel ch = getChannel();
		Transaction txn = ch.getTransaction();
		txn.begin();
		try {
			Event event = ch.take();

			byte[] eventBody = null;
			if (event != null) {
				eventBody = event.getBody();
			}

			if ((eventBody != null) && (eventBody.length > 0) && (new              
                              String(eventBody).indexOf(keyword)>=0)) {//自定义过滤
				this.sinkCounter.incrementEventDrainAttemptCount();
				LOG.debug("Sending request : " + new String(event.getBody()));
				try {
					HttpURLConnection connection =this.connectionBuilder.getConnection();
					outputStream = new PrintWriter(connection.getOutputStream());
                    //给接口传递参数
					outputStream.write("topic="+topic+"&key="+key+"&value=" + new 
                                              String(eventBody));
					outputStream.flush();
					outputStream.close();

					int httpStatusCode = connection.getResponseCode();
					LOG.debug("Got status code : " + httpStatusCode);

					if (httpStatusCode < 400) {
						connection.getInputStream().close();
					} else {
						LOG.debug("bad request");
						connection.getErrorStream().close();
					}
					LOG.debug("Response processed and closed");

					if (httpStatusCode >= 100) {
						String httpStatusString = String.valueOf(httpStatusCode);

						boolean shouldRollback = findOverrideValue(httpStatusString, this.rollbackOverrides,
								this.defaultRollback);

						if (shouldRollback)
							txn.rollback();
						else {
							txn.commit();
						}

						boolean shouldBackoff = findOverrideValue(httpStatusString, this.backoffOverrides,
								this.defaultBackoff);

						if (shouldBackoff) {
							status = Sink.Status.BACKOFF;
						} else {
							status = Sink.Status.READY;
						}

						boolean shouldIncrementMetrics = findOverrideValue(httpStatusString,
								this.incrementMetricsOverrides, this.defaultIncrementMetrics);

						if (shouldIncrementMetrics) {
							this.sinkCounter.incrementEventDrainSuccessCount();
						}

						if (shouldRollback) {
							if (shouldBackoff) {
								LOG.info(String.format(
										"Got status code %d from HTTP server. Rolled back event and backed off.",
										new Object[] { Integer.valueOf(httpStatusCode) }));
							}
						} else {
							LOG.info(String.format("Got status code %d from HTTP server. Rolled back event for retry.",
									new Object[] { Integer.valueOf(httpStatusCode) }));
						}
					} else {
						txn.rollback();
						status = Sink.Status.BACKOFF;

						LOG.warn("Malformed response returned from server, retrying");
					}
				} catch (IOException e) {
					txn.rollback();
					status = Sink.Status.BACKOFF;

					LOG.error("Error opening connection, or request timed out", e);
				}
			} else {
				txn.commit();
				status = Sink.Status.BACKOFF;

				LOG.warn("Processed empty event");
			}
		} catch (Throwable t) {
			txn.rollback();
			status = Sink.Status.BACKOFF;

			LOG.error("Error sending HTTP request, retrying", t);

			if (t instanceof Error)
				throw ((Error) t);
		} finally {
			txn.close();

			if (outputStream != null)
				try {
					outputStream.close();
				} catch (Exception localIOException1) {
				}
		}

		return status;
	}

	private void parseConfigOverrides(String propertyName, Context context, Map<String, Boolean> override) {
		ImmutableMap config = context.getSubProperties(propertyName + ".");

		if (config != null)
			for (UnmodifiableIterator localUnmodifiableIterator = config.entrySet()
					.iterator(); localUnmodifiableIterator.hasNext();) {
				Map.Entry value = (Map.Entry) localUnmodifiableIterator.next();
				LOG.info(String.format("Read %s value for status code %s as %s",
						new Object[] { propertyName, value.getKey(), value.getValue() }));

				if (override.containsKey(value.getKey()))
					LOG.warn(String.format("Ignoring duplicate config value for %s.%s",
							new Object[] { propertyName, value.getKey() }));
				else
					override.put((String) value.getKey(), Boolean.valueOf((String) value.getValue()));
			}
	}

	private boolean findOverrideValue(String statusCode, HashMap<String, Boolean> overrides, boolean defaultValue) {
		Boolean overrideValue = (Boolean) overrides.get(statusCode);
		if (overrideValue == null) {
			overrideValue = (Boolean) overrides.get(statusCode.substring(0, 1) + "XX");
			if (overrideValue == null)
				overrideValue = Boolean.valueOf(defaultValue);
		}

		return overrideValue.booleanValue();
	}

	final void setConnectionBuilder(ConnectionBuilder builder) {
		this.connectionBuilder = builder;
	}

	final void setSinkCounter(SinkCounter newSinkCounter) {
		this.sinkCounter = newSinkCounter;
	}

	class ConnectionBuilder {
		public HttpURLConnection getConnection() throws IOException {
			/*
			 * HttpURLConnection connection =
			 * (HttpURLConnection)HttpSink.access$000(this.this$0)
			 * .openConnection();
			 */
			HttpURLConnection connection = (HttpURLConnection) endpointUrl.openConnection();
			connection.setRequestMethod("POST");
			//connection.setRequestProperty("Content-Type", contentTypeHeader);
			connection.setRequestProperty("Accept", "application/json; charset=UTF-8");
			connection.setConnectTimeout(connectTimeout);
			connection.setReadTimeout(connectTimeout);
			connection.setDoOutput(true);
			connection.setDoInput(true);
			connection.connect();
			return connection;
		}
	}
}

      6、http接口:

@RequestMapping("/httpConnection")
   @ResponseBody
   public void httpUrlConnectionClient(@RequestParam String topic,@RequestParam String key,@RequestParam String value) throws UnsupportedEncodingException{
	   System.out.println(topic+"++++"+key+"++++"+value);
   }

     7 、汉字乱码 :源码中的编码格式不必改变,通过设置配置文件中的inputCharset的编码控制数据源的数据,可先将数据打印到控制台尝试:

# Describe the sink
a1.sinks.k1.type = logger

启动命令(先到flume的安装目录):
bin/flume-ng agent -c ./conf -f ./conf/spool-logger.conf -n a1 -Dflume.root.logger=INFO,console

   查找乱码的时候注意查看Linux系统的编码:命令:locale

    修改编码:vi .bashrc 添加如下代码

export LANG=en_US.UTF-8
export LC_CTYPE="en_US.UTF-8"
export LC_NUMERIC="en_US.UTF-8"
export LC_TIME="en_US.UTF-8"
export LC_COLLATE="en_US.UTF-8"
export LC_MONETARY="en_US.UTF-8"
export LC_MESSAGES="en_US.UTF-8"
export LC_PAPER="en_US.UTF-8"
export LC_NAME="en_US.UTF-8"
export LC_ADDRESS="en_US.UTF-8"
export LC_TELEPHONE="en_US.UTF-8"
export LC_MEASUREMENT="en_US.UTF-8"
export LC_IDENTIFICATION="en_US.UTF-8"

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值