Flume采集埋点数据

nginx部署

  1. 下载OpenResty

(1)wget https://openresty.org/download/openresty-1.15.8.1.tar.gz #下载源码安装包

(2)tar -zxvf openresty-1.15.8.1.tar.gz #解压

(3)./configure --without-http_redis2_module --with-http_iconv_module #选择需要的插件启用, --with-Components 激活组件,–without 则是禁止组件

(4)make && make install #编译加安装

(5)vi /etc/profile #加入path路径

(6)export PATH=$PATH:/usr/local/openresty/nginx/sbin/ #加入的内容

(7)source /etc/profile ##使配置生效

安装检测

执行nginx -V

  1. 确定需求

由于报表开发需要获取用户登录次数,需要在前端做数据埋点,涉及字段如下

字段中文名

字段英文名

字段类型

事件类型

event_type

string

客户id

customer_id

string

性别

sex

string

平台类型(1-兔聊(牵守),2-觅伊)

app_type

string

登陆来源渠道

login_source

string

客户端版本号

login_i_version

string

上报地址:

    var img = new Image(1, 1);

    img.src = 'https://bidata.miyiapp.com/1.gif?' + args;

    })();

  1. Nginx配置文件/usr/local/openresty/nginx/conf/nginx.conf修改

http {

    include       mime.types;

    default_type  application/octet-stream;

    #log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '

    #                  '$status $body_bytes_sent "$http_referer" '

    #                  '"$http_user_agent" "$http_x_forwarded_for"';

    log_format log_json escape=json '{"event_type": "$event_type", '

                    '"customer_id": "$customer_id", '

                    '"sex": "$sex", '

                    '"app_type": "$app_type", '

                    '"login_source": "$login_source", '

                    '"login_i_version": “$login_i_version”, '

                    '"create_time": "$time_local", '

                    ' }';

    #access_log  logs/access.log  main;

    sendfile        on;

    #tcp_nopush     on;

    #keepalive_timeout  0;

    keepalive_timeout  65;

    #gzip  on;

    server {

        listen       80;

        server_name  localhost;

        default_type 'text/html';

        charset utf-8;

        #charset koi8-r;

        #access_log  logs/host.access.log  main;

        location /1.gif {

        log_escape_non_ascii off;

        #伪装成gif文件

        default_type image/gif;

        #本身关闭access_log,通过subrequest记录log

        access_log off;

        access_by_lua "

        -- 用户跟踪cookie名为__utrace

        local uid = ngx.var.cookie___utrace

        if not uid then

        -- 如果没有则生成一个跟踪cookie,算法为md5(时间戳+IP+客户端信息)

        uid = ngx.md5(ngx.now() .. ngx.var.remote_addr .. ngx.var.login_source)

        end

        ngx.header['Set-Cookie'] = {'__utrace=' .. uid .. '; path=/'}

        if ngx.var.arg_customer_id then

        -- 通过subrequest到/i-log记录日志,将参数和用户跟踪cookie带过去

        local head = ngx.req.get_headers()

        local v = head['isDev']

        if v ~= 0 then

        local signature = head['signature']

        local nonce = head['nonce']

        local timestamp = head['timestamp']

        local params = ngx.req.get_uri_args()

        local keys, tmp = {}, {}

        for k, _ in pairs(params) do

            keys[#keys+1] = k

        end

        table.sort(keys)

        for _,k in pairs(keys) do

            tmp[#tmp+1] = k .. tostring(params[k])

        end

        local signchar = '1eDCIDd' .. timestamp .. nonce .. table.concat(tmp) .. 'Yophd0E'

        local rightsign = ngx.md5(signchar)

        if signature ~= rightsign then

            local mess='密钥错误'

            ngx.log(ngx.ERR, mess)

            return ngx.exit(ngx.HTTP_FORBIDDEN)

        end

        local now_mill = ngx.now() * 1000

        if timestamp == nil then

            local mess='链接过期'

            ngx.log(ngx.ERR, mess)

            return ngx.exit(ngx.HTTP_FORBIDDEN)

        end

        if timestamp ~= nil then

            if now_mill - timestamp  > 60000 then

                local mess='链接过期'

                ngx.log(ngx.ERR, mess)

                return ngx.exit(ngx.HTTP_FORBIDDEN)

            end

        end

ngx.location.capture('/i-log?' .. ngx.var.args .. '&utrace=' .. uid)

        end

        ";

        #此请求不缓存

        add_header Expires "Fri, 01 Jan 1980 00:00:00 GMT";

        add_header Pragma "no-cache";

        add_header Cache-Control "no-cache, max-age=0, must-revalidate";

        #返回一个1×1的空gif图片

        empty_gif;

        }

        location /i-log {

        #内部location,不允许外部直接访问

        internal;

        #设置变量,注意需要unescape

        set_unescape_uri $event_type $arg_event_type;

        set_unescape_uri $customer_id $arg_customer_id;

        set_unescape_uri $sex $arg_sex;

        set_unescape_uri $app_type $arg_app_type;

        set_unescape_uri $login_source $arg_login_source;

        set_unescape_uri $login_i_version $arg_login_i_version;

        #打开日志

        log_subrequest on;

        access_log /opt/bi/weblog/loginrecord/loginrecord.log log_json;

        #输出空字符串

        echo '';

        }

  1. 日志文件拆分脚本/opt/bi/weblog/splitlog.sh

#!/bin/sh

#!/bin/bash

time=`date +%Y%m%d`

mv /opt/bi/weblog/loginrecord/loginrecord.log /opt/bi/weblog/loginrecord/loginrecord${time}.log

kill -USR1 `cat /usr/local/openresty/nginx/logs/nginx.pid`

Flume安装部署

1、下载地址:Index of /dist/flume

2、mv flume-env.sh.template flume-env.sh

vi flume-env.sh

Export JAVA_HOME=/opt/module/jdk1.8.0_212

Flume启动不打印日志问题:下载apache-flume-1.11.0-bin.tar.gz替换*4j*jar包

  1. 下载flume-datahub插件下载链接

解压flume插件并放在${FLUME_HOME}/plugins.d目录下

$ tar aliyun-flume-datahub-sink-x.x.x.tar.gz

$ mkdir ${FLUME_HOME}/plugins.d

$ mv aliyun-flume-datahub-sink ${FLUME_HOME}/plugins.d

  1. 修改配置文件/opt/bi/flume/apache-flume-1.9.0-bin/conf

# A single-node Flume configuration for DataHub

# Name the components on this agent

a1.sources = r1

a1.sinks = k1

a1.channels = c1

# Describe/configure the source

a1.sources.r1.type = TAILDIR

a1.sources.r1.positionFile = /opt/bi/flume/flume-1.9.0/taildir_position.json

a1.sources.r1.filegroups = f1

a1.sources.r1.filegroups.f1 = /opt/bi/weblog/toutiao/.*log.*

# 添加自定义的flume拦截器类

a1.sources.r1.interceptors = i1

a1.sources.r1.interceptors.i1.type = org.example.TimestampInterceptor$Builder

# Describe the sink

a1.sinks.k1.type = com.aliyun.datahub.flume.sink.DatahubSink

a1.sinks.k1.datahub.accessId = LTAI5tRirLX3XTA9qSPobmR5

a1.sinks.k1.datahub.accessKey = mUdH1pDVYhVMxgL5YsfCVnBS3xDREz

a1.sinks.k1.datahub.endPoint = https://dh-cn-shenzhen.aliyuncs.com

a1.sinks.k1.datahub.project = rldw

a1.sinks.k1.datahub.topic = ods_advertiser_data

a1.sinks.k1.serializer = JSON

a1.sinks.k1.serializer.fieldnames = advertiser_id,advertiser_name,cost,show,click,datetime

a1.sinks.k1.serializer.charset = UTF-8

a1.sinks.k1.datahub.retryTimes = 5

a1.sinks.k1.datahub.retryInterval = 5

a1.sinks.k1.datahub.batchSize = 1000

a1.sinks.k1.datahub.batchTimeout = 5

# Use a channel which buffers events in memory

a1.channels.c1.type = memory

a1.channels.c1.capacity = 10000

a1.channels.c1.transactionCapacity = 10000

# Bind the source and sink to the channel

a1.sources.r1.channels = c1

a1.sinks.k1.channel = c1

  1. 启动flume

nohup /opt/bi/flume/flume-1.9.0/bin/flume-ng agent --conf /opt/bi/flume/flume-1.9.0/conf --conf-file /opt/bi/flume/flume-1.9.0/conf/flume-datahub.conf --name a1  2>&1  &

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值