1. 对于需要在ES中进行查询的表格,增加字段UPDATE_TIME,ES同步数据时根据该字段同步插入和更新的数据。在增加和修改数据时,MySql会自动更新UPDATE_TIME为当前时间,不需要人工进行管理
ALTER TABLE `table_name` ADD COLUMN `UPDATE_TIME` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间' ;
需要注意的是,如果数据库表已经有数据了,则需要分两次进行插入(Mysql5.5),否则直接执行上面的语句,update_time的值会变为0000-00-00
alter table table_name add column update_time TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间';
alter table table_name modify column update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间'
2. 对于需要在ES中进行查询的表格,增加触发器,在触发器中向另一个表中插入数据,用于记录删除的表名和对应的数据ID
CREATE TABLE T_ELASTIC_SEARCH_DELETE (
ID BIGINT NOT NULL AUTO_INCREMENT,
TABLE_NAME VARCHAR(256),
OLD_ID BIGINT,
PRIMARY KEY (ID)
);
DROP TRIGGER table_name_AFTER_DELETE;
DELIMITER ||
CREATE TRIGGER `table_name_AFTER_DELETE` AFTER DELETE ON `table_name` FOR EACH ROW
BEGIN
INSERT INTO T_ELASTIC_SEARCH_DELETE(TABLE_NAME, OLD_ID) VALUES ('table_name', OLD.ID);
END||
DELIMITER ;
3. 在代码中增加定时任务,读取数据库的值,然后删除ES中对应的文档
String sql = "SELECT ID, TABLE_NAME, OLD_ID FROM T_ELASTIC_SEARCH_DELETE";
Set<Long> idList = new HashSet<>();
int limit = 100;
Map<String, Set<Long>> indexDelete = new HashMap<>();
Map<String, Set<Long>> dbDelete = new HashMap<>();
for (int i = 0;; i += limit) {
QueryResult<Object[]> qr = entityMgr.readDataList(sql, i, limit);
if (qr.getLength() == 0) {
break;
}
for (Object[] data : qr.getData()) {
// 索引的名字
String index = "dbName" + "_" + data[1];
// 删除ES中的数据
Set<Long> oldIds = indexDelete.get(index);
if (oldIds == null) {
oldIds = new HashSet<>();
indexDelete.put(index, oldIds);
}
oldIds.add((Long)Utils.parseLong(data[2]));
// 用于删除数据库数据
Set<Long> ids = dbDelete.get(index);
if (ids == null) {
ids = new HashSet<>();
dbDelete.put(index, ids);
}
ids.add((Long)Utils.parseLong(data[0]));
}
}
for (String index : indexDelete.keySet()) {
try {
Set<Long> ids = indexDelete.get(index);
if (ids.size() == 0) {
continue;
}
DeleteByQueryRequest request = new DeleteByQueryRequest(index);
request.setQuery(QueryBuilders.termsQuery("id", ids.toArray(new Object[0])));
// 获取ES的RestHighLevelClient,执行删除操作
EsUtils.getClient().deleteByQuery(request, RequestOptions.DEFAULT);
idList.addAll(dbDelete.get(index));
} catch (IOException e) {
e.printStackTrace();
}
}
if (idList.size() > 0) {
String deleteSql = "DELETE FROM T_ELASTIC_SEARCH_DELETE WHERE ID IN (" + SysUtils.collectionToString(idList, "," , null) + ")";
entityMgr.update(deleteSql);
}
注意:在配置logstash时,自动将索引的名字写为了dbName_tableName,方便后续处理
4. 配置Logstash(Logstash用于同步数据库中的增加和修改操作),对应配置文件
input {
stdin { }
jdbc {
jdbc_driver_library => "mysql-connector-java-5.1.43.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://192.168.6.27:3306/dbName"
jdbc_user => "root"
jdbc_password => "123456"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
schedule => "* * * * *"
use_column_value => true
tracking_column => "update_time"
tracking_column_type => "timestamp"
statement => "select * from table_name where update_time >= :sql_last_value"
type => "dbName_table_name"
last_run_metadata_path => "../path/dbName_table_name.log"
}
}
output {
elasticsearch {
index => "%{type}"
document_id => "%{id}"
document_type => "%{type}" # <- use the type from each input
hosts => "localhost:9200"
}
stdout { codec => rubydebug }
}

本文介绍了一种高效的数据同步策略,通过在MySQL表中增加UPDATE_TIME字段,利用触发器记录删除操作,并结合定时任务和Logstash配置,实现MySQL到Elasticsearch的实时数据同步。
6224





