前言:
提前准备好kafka、clickhouse环境
一、filebeat安装
1、linux安装包下载地址
https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-8.8.2-linux-x86_64.tar.gz
2、解压缩
解压到服务器指定位置
3、修改配置文件
切换到解压目录下,修改filebeat.yml文件,参考:
# ====================== Inputs =====================
#日志采集类型及路径(可配置多个)
filebeat.inputs:
# - type: log
# enabled: true
# #每次采集缓冲大小,默认16k(16384),可手动调大,提供吞吐量
# #harvester_buffer_size: 1638400
# #每条日志最大字节数,默认10M,超过该设置将丢弃剩余信息。
# # max_bytes: 10485760
# #日志文件路径
# paths:
# #采集该具体日志文件
# - /home/hylink/docker/container/lpg/mall-tiny-loki/logs/enforce-supervise-service.log
# #添加新字段可发送至不同topic
# fields:
# kafka_topic: firstTopic
#第二个采集配置
- type: log
enabled: true
paths:
#采集该目录下所有.log文件
- /home/hylink/docker/container/lpg/mall-tiny-loki/logs/enforce-supervise-service.log
#添加新字段可发送至不同topic
fields:
kafka_topic: filebeat
#多行合并规则,以时间开头的为一条完整日志,否则合并到上一行(java、python日志都以日期开头)
multiline.type: pattern
#中括号日期开头:[2015-08-24 11:49:14,389]
#multiline.pattern: '^\[[0-9]{4}-[0-9]{2}-[0-9]{2}'
#日期开头:2015-08-24 11:49:14,389
multiline.pattern: '^[0-9]{4}-[0-9]{2}-[0-9]{2}'
multiline.negate: true
multiline.match: after
#合并最大条数,默认500
mutiline.max_lines: 1000
# 这个文件记录日志读取的位置,如果容器重启,可以从记录的位置开始取日志
# registry_file: /usr/soft/filebeat/data/registry
# ============= Filebeat modules ====================
filebeat.config.modules:
# Glob pattern for configuration loading
path: ${path.config}/modules.d/*.yml
# Set to true to enable config reloading
reload.enabled: false
# ==================== Outputs =========================
#kafka地址,可配置多个用逗号隔开
output.kafka:
enabled: true
hosts: ["192.168.3.234:9092"]
#根据上面添加字段发送不同topic
topic: '%{[fields.kafka_topic]}'
#控制台输出
#output.console:
# pretty: true
# enable: true
# ===================== Processors ===========================
processors:
- add_host_metadata:
when.not.contains.tags: forwarded
- add_cloud_metadata: ~
- add_docker_metadata: ~
- add_kubernetes_metadata: ~
#设置忽略字段,以下字段不显示在日志中
- drop_fields:
fields: ["host","input","agent","ecs","log","@version","flags"]
ignore_missing: false
#提取日志内容中的时间,并添加到msg_time自动中
# - script:
# lang: javascript
# tag: my_filter
# source: >
# function process(event) {
# var msg = event.Get("message");
# var time = msg.split(" ").slice(0,2).join(" ");
# event.Put("msg_time",time);
# }
#使用日志中的时间替换filebeat采集时间,即上面提取的msg_time字段值替换@Timestamp字段值
# - timestamp:
# field: msg_time
# timezone: Asia/Shanghai
# layouts:
# - '2006-01-02 15:04:05'
# - '2006-01-02 15:04:05.999'
# - '2006-01-02 15:04:05.999-07:00'
# - drop_fields:
# fields: [msg_time]
4、临时运行
切换到解压目录下执行
./filebeat -e -c filebeat.yml
5、制作启动服务
# 切换努力
cd /usr/lib/systemd/system
# 创建启动文件
touch filebeat.service
# 编辑
vim filebeat.service
# 内容
[Unit]
Description=filebeat service...
Wants=network-online.target
After=network-online.target
[Service]
User=root
ExecStart=/home/hylink/docker/container/filebeat/filebeat-8.8.2/filebeat -e -c /home/hylink/docker/container/filebeat/filebeat-8.8.2/filebeat.yml
Restart=always
[Install]
WantedBy=multi-user.target
# 保存
:wq
# 相关操作命令:
systemctl daemon-reload # 加载文件配置
systemctl enable filebeat # 服务自启
systemctl status filebeat # 服务状态
systemctl start filebeat # 服务启动
systemctl restart filebeat # 服务重启
journalctl -f -u filebeat # 查看日志
二、springboot框架集成
1、引入kafka及clickhouse相关maven依赖
<!--kafka-->
<dependency>
<groupId>org.springframework.kafka</groupId>
<artifactId>spring-kafka</artifactId>
<version>版本号</version>
</dependency>
<!--clickhouse-->
<dependency>
<groupId>ru.yandex.clickhouse</groupId>
<artifactId>clickhouse-jdbc</artifactId>
<version>0.3.2(版本号)</version>
</dependency>
2、配置kafka消息消费
内容略
3、配置clickhouse数据源
内容略
三、filebeat+kafka+clickhouse+springboot框架体系的日志收集分析系统
针对文章中clickhouse的ARRAY类型字段,在java代码中的处理方式,这里采用List<?>来处理clickhouse的ARRAY类型,具体操作方式如下:
1、自定义handle类型
package com.hylink.handler;
/**
* @Description: 自定义handle类型,针对clickhouse字段ARRAY类型
* @author: dcj
* @date: 2023-07-*17
*/
import org.apache.ibatis.executor.result.ResultMapException;
import org.apache.ibatis.type.*;
import java.sql.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
@MappedTypes({List.class})
@MappedJdbcTypes(JdbcType.ARRAY)
public class ListTypeHandler implements TypeHandler<List<?>> {
@Override
public void setParameter(PreparedStatement ps, int i, List<?> parameter, JdbcType jdbcType) throws SQLException {
if (parameter == null) {
try {
ps.setNull(i, JdbcType.ARRAY.TYPE_CODE);
} catch (SQLException e) {
throw new TypeException("Error setting null for parameter #" + i + " with JdbcType " + jdbcType + " . "
+ "Try setting a different JdbcType for this parameter or a different jdbcTypeForNull configuration property. "
+ "Cause: " + e, e);
}
} else {
try {
ps.setArray(i, ps.getConnection().createArrayOf(jdbcType.name(), parameter.toArray()));
} catch (Exception e) {
throw new TypeException("Error setting non null for parameter #" + i + " with JdbcType " + jdbcType
+ " . "
+ "Try setting a different JdbcType for this parameter or a different configuration property. "
+ "Cause: " + e, e);
}
}
}
@Override
public List<?> getResult(ResultSet rs, String columnName) throws SQLException {
List<?> result;
try {
Array array = rs.getArray(columnName);
result = parse(array);
} catch (Exception e) {
throw new ResultMapException(
"Error attempting to get column '" + columnName + "' from result list. Cause: " + e, e);
}
if (rs.wasNull()) {
return null;
} else {
return result;
}
}
@Override
public List<?> getResult(ResultSet rs, int columnIndex) throws SQLException {
List<?> result;
try {
Array array = rs.getArray(columnIndex);
result = parse(array);
} catch (Exception e) {
throw new ResultMapException(
"Error attempting to get column #" + columnIndex + " from result list. Cause: " + e, e);
}
if (rs.wasNull()) {
return null;
} else {
return result;
}
}
@Override
public List<?> getResult(CallableStatement cs, int columnIndex) throws SQLException {
List<?> result;
try {
Array array = cs.getArray(columnIndex);
result = parse(array);
} catch (Exception e) {
throw new ResultMapException(
"Error attempting to get column #" + columnIndex + " from callable statement. Cause: " + e, e);
}
if (cs.wasNull()) {
return null;
} else {
return result;
}
}
private List<?> parse(Array array) throws SQLException {
List<?> result;
final int baseType = array.getBaseType();
if (baseType == Types.INTEGER) {
result = Arrays.stream((long[]) array.getArray()).boxed().collect(Collectors.toList());
} else if (baseType == Types.BIGINT) {
result = Arrays.stream((long[]) array.getArray()).boxed().collect(Collectors.toList());
} else if (baseType == Types.VARCHAR) {
result = new ArrayList<>(Arrays.asList((String[]) array.getArray()));
} else if (baseType == Types.DOUBLE) {
result = Arrays.stream((double[]) array.getArray()).boxed().collect(Collectors.toList());
} else if (baseType == Types.FLOAT) {
result = Arrays.stream((double[]) array.getArray()).boxed().collect(Collectors.toList());
} else {
result = Collections.singletonList((Object[]) array.getArray());
}
return result;
}
}
2、实体类引用方式
...................
/**
* 字符串列key集合
*/
@TableField(value = "string.keys", typeHandler = ListTypeHandler.class)
private List<String> stringKeys;
...................
3、实体对应的*.xml文件
<resultMap type="com.hylink.entity.UnifiedLog" id="testMap">
...............
<result property="stringKeys" column="string.keys" jdbcType="OTHER" typeHandler="com.hylink.handler.ListTypeHandler"/>
...............
</resultMap>
以上步骤完成,能够实现数据的查询,但是想要实现数据插入保存,在执行insert操作时,需要将List<?> stringKeys 转换成Object[]才可以
Object[] stringKeys = unifiedLog.getStringKeys().toArray();
四、结尾
以上步骤完成了系统的日志收集工作,接下来如何分析日志,以及查询sql如何优化,是需要根据具体的工作需要来具体分析,这里给出的参考sql,结合
1、模糊查询
select * from unified_log ul where multiSearchAny(rawLog, ['group1'])
2、精确查询
select * from unified_log WHERE has(string.values, '123456789') AND string.values[indexOf(string.keys, 'classStr')] = '123456789'