a1.sources = r1
a1.sinks = k1
a1.channels = c1
##### source
a1.sources.r1.type =spooldir
a1.sources.r1.spoolDir=/urldata/t_url/
##设置修改文件名称为.COMPLETED
##a1.sources.r1.fileSuffix = .COMPLETED
#sinks成功后删除文件
a1.sources.r1.deletePolicy= immediate
#sinks遇到编码错误问题跳过
a1.sources.r1.decodeErrorPolicy=IGNORE
a1.sources.r1.checkperiodic = 50
# Describe the sink
###扫描的文件每个数据是用按照 | 隔开的 ,所以为了sink到elasticsearch 中需要 使用拦截器 对 每行文本进行分割
a1.sources.r1.interceptors=es_interceptor
a1.sources.r1.interceptors.es_interceptor.type = regex_extractor
a1.sources.r1.interceptors.es_interceptor.regex = ([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)\\|([^\\|]*)
a1.sources.r1.interceptors.es_interceptor.serializers =s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 s12 s13 s14 s15 s16 s17 s18 s19 s20 s21 s22
a1.sources.r1.interceptors.es_interceptor.serializers.s1.name = rid
a1.sources.r1.interceptors.es_interceptor.serializers.s2.name = dir
a1.sources.r1.interceptors.es_interceptor.serializers.s3.name = username
a1.sources.r1.interceptors.es_interceptor.serializers.s4.name = sip
a1.sources.r1.interceptors.es_interceptor.serializers.s5.name = sport
a1.sources.r1.interceptors.es_interceptor.serializers.s6.name = dip
a1.sources.r1.interceptors.es_interceptor.serializers.s7.name = dport
a1.sources.r1.interceptors.es_interceptor.serializers.s8.name = bytes
a1.sources.r1.interceptors.es_interceptor.serializers.s9.name = starttime
##将时间转换为毫秒数,暂时不需要
#a1.sources.r1.interceptors.es_interceptor.serializers.s9.type = org.apache.flume.interceptor.RegexExtractorInterceptorMillisSerializer
#a1.sources.r1.interceptors.es_interceptor.serializers.s9.pattern = yyyy-MM-dd HH:mm:ss
a1.sources.r1.interceptors.es_interceptor.serializers.s10.name = action
a1.sources.r1.interceptors.es_interceptor.serializers.s11.name = url
a1.sources.r1.interceptors.es_interceptor.serializers.s12.name = descid
a1.sources.r1.interceptors.es_interceptor.serializers.s13.name = domain
a1.sources.r1.interceptors.es_interceptor.serializers.s14.name = type
a1.sources.r1.interceptors.es_interceptor.serializers.s15.name = subtype
a1.sources.r1.interceptors.es_interceptor.serializers.s16.name = words
a1.sources.r1.interceptors.es_interceptor.serializers.s17.name = line
a1.sources.r1.interceptors.es_interceptor.serializers.s18.name = platform
a1.sources.r1.interceptors.es_interceptor.serializers.s19.name = browser
a1.sources.r1.interceptors.es_interceptor.serializers.s20.name = grpids
a1.sources.r1.interceptors.es_interceptor.serializers.s21.name = referer
a1.sources.r1.interceptors.es_interceptor.serializers.s22.name = termtype
##### sink
a1.sinks.k1.type = org.apache.flume.sink.elasticsearch.ElasticSearchSink
a1.sinks.k1.hostNames =192.168.1.181:9300
a1.sinks.k1.indexName =t_url
a1.sinks.k1.indexType =info
a1.sinks.k1.clusterName =es_polyinfo
a1.sinks.k1.fields =rid,dir,username,sip,sport,dip,dport,bytes,starttime,action,url,descid,domain,type,subtype,words,line,platform,browser,grpids,referer,termtype
a1.sinks.k1.ttl =1m
a1.sinks.k1.bachSize =1000
###以body格式的字符存入es
a1.sinks.k1.serializer =org.apache.flume.sink.elasticsearch.ElasticSearchDynamicSerializer
##a1.sinks.k1.serializer = org.apache.flume.sink.elasticsearch.ElasticSearchLogStashEventSerializer
##### channels
a1.sinks.k1.channel = memoryChannel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 100000
a1.channels.c1.transactionCapacity = 1000
#### Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1