说明
- Elasticsearch 版本7.2.0
- 同义词插件:elasticsearch-analysis-dynamic-synonym
- 无停机动态远程更新同义词
1、下载同义词插件
下载地址:
https://github.com/bells/elasticsearch-analysis-dynamic-synonym
dynamic synonym version | ES version |
---|---|
master | 7.x -> master |
6.1.4 | 6.1.4 |
5.2.0 | 5.2.0 |
5.1.1 | 5.1.1 |
2.3.0 | 2.3.0 |
2.2.0 | 2.2.0 |
2.1.0 | 2.1.0 |
2.0.0 | 2.0.0 |
1.6.0 | 1.6.X |
Elasticsearch 的插件需要版本号进行对应,所以下载下同义词插件后,需要重新进行编译:
修改 pom.xml
2、重写远程词库加载类
2.1 新建 DBRemoteSynonymFile.java 文件
说明:这里主要是对 LocalSynonymFile 及 RemoteSynonymFile 类进行仿写
主要有三个function :
- reloadSynonymMap 重新加载同义词
- isNeedReloadSynonymMap 重新加载同义词的条件
- getReader 同义词的来源
package com.bellszhu.elasticsearch.plugin.synonym.analysis;
import com.bellszhu.elasticsearch.plugin.DynamicSynonymPlugin;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.synonym.SolrSynonymParser;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.analysis.synonym.WordnetSynonymParser;
import org.elasticsearch.common.io.PathUtils;
import org.elasticsearch.env.Environment;
import java.io.*;
import java.nio.file.Path;
import java.sql.*;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Properties;
/**
* @author
* @description //从DB数据库拉取同义词数据
* @date 2019/8/27
*/
public class DBRemoteSynonymFile implements SynonymFile {
// 配置文件名
private final static String DB_PROPERTIES = "jdbc-reload.properties";
private static Logger logger = LogManager.getLogger("dynamic-synonym");
private String format;
private boolean expand;
private Analyzer analyzer;
private Environment env;
// 数据库配置
private String location;
private long lastModified;
private Connection connection = null;
private Statement statement = null;
private Properties props;
private Path conf_dir;
DBRemoteSynonymFile(Environment env, Analyzer analyzer,
boolean expand, String format, String location) {
this.analyzer = analyzer;
this.expand = expand;
this.format = format;
this.env = env;
this.location = location;
this.props = new Properties();
//读取当前 jar 包存放的路径
Path filePath = PathUtils.get(new File(DynamicSynonymPlugin.class.getProtectionDomain().getCodeSource()
.getLocation().getPath())
.getParent(), "config")
.toAbsolutePath();
this.conf_dir = filePath.resolve(DB_PROPERTIES);
//判断文件是否存在
File configFile = conf_dir.toFile();
InputStream input = null;
try {
input = new FileInputStream(configFile);
} catch (FileNotFoundException e) {
logger.info("jdbc-reload.properties not find. " + e);
}
if (input != null) {
try {
props.load(input);
} catch (IOException e) {
logger.error("fail to load the jdbc-reload.properties," + e);
}
}
isNeedReloadSynonymMap();
}
/**
* 加载同义词词典至SynonymMap中
* @return SynonymMap
*/
@Override
public SynonymMap reloadSynonymMap() {
try {
logger.info("start reload local synonym from {}.", location);
Reader rulesReader = getReader();
SynonymMap.Builder parser = RemoteSynonymFile.getSynonymParser(rulesReader, format, expand, analyzer);
return parser.build();
} catch (Exception e) {
logger.error("reload local synonym {} error!", e, location);
throw new IllegalArgumentException(
"could not reload local synonyms file to build synonyms", e);
}
}
/**
* 判断是否需要进行重新加载
* @return true or false
*/
@Override
public boolean isNeedReloadSynonymMap() {
try {
Long lastModify = getLastModify();
if (lastModified < lastModify) {
lastModified = lastModify;
return true;
}
} catch (Exception e) {
logger.error(e);
}
return false;
}
/**
* 获取同义词库最后一次修改的时间
* 用于判断同义词是否需要进行重新加载
*
* @return getLastModify
*/
public Long getLastModify() {
ResultSet resultSet = null;
Long last_modify_long = null;
try {
if (connection == null || statement == null) {
Class.forName(props.getProperty("jdbc.driver"));
connection = DriverManager.getConnection(
props.getProperty("jdbc.url"),
props.getProperty("jdbc.user"),
props.getProperty("jdbc.password")
);
statement = connection.createStatement();
}
resultSet = statement.executeQuery(props.getProperty("jdbc.lastModified.synonym.sql"));
while (resultSet.next()) {
Timestamp last_modify_dt = resultSet.getTimestamp("last_modify_dt");
last_modify_long = last_modify_dt.getTime();
}
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
} finally {
try {
if (resultSet != null) {
resultSet.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
}
return last_modify_long;
}
/**
* 查询数据库中的同义词
* @return DBData
*/
public ArrayList<String> getDBData() {
ArrayList<String> arrayList = new ArrayList<>();
ResultSet resultSet = null;
try {
if (connection == null || statement == null) {
Class.forName(props.getProperty("jdbc.driver"));
connection = DriverManager.getConnection(
props.getProperty("jdbc.url"),
props.getProperty("jdbc.user"),
props.getProperty("jdbc.password")
);
statement = connection.createStatement();
}
resultSet = statement.executeQuery(props.getProperty("jdbc.reload.synonym.sql"));
while (resultSet.next()) {
String theWord = resultSet.getString("words");
arrayList.add(theWord);
}
} catch (ClassNotFoundException e) {
logger.error(e);
} catch (SQLException e) {
logger.error(e);
} finally {
try {
if (resultSet != null) {
resultSet.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
}
return arrayList;
}
/**
* 同义词库的加载
* @return Reader
*/
@Override
public Reader getReader() {
StringBuffer sb = new StringBuffer();
try {
ArrayList<String> dbData = getDBData();
for (int i = 0; i < dbData.size(); i++) {
logger.info("load the synonym from db," + dbData.get(i));
sb.append(dbData.get(i))
.append(System.getProperty("line.separator"));
}
} catch (Exception e) {
logger.error("reload synonym from db failed");
}
return new StringReader(sb.toString());
}
}
2.2 修改 DynamicSynonymTokenFilterFactory 类
说明:DynamicSynonymTokenFilterFactory 是对词库的路径进行选择,通过不同的参数设置,调用不同路径下的词库:
主要是通过
synonyms_path
这个参数进行设置
新添一个路径,代码如下:
SynonymFile synonymFile;
// fromDB 可自定义
if (location.equals("fromDB")) {
synonymFile = new DBRemoteSynonymFile(env, analyzer, expand, format,
location);
} else if (location.startsWith("http://") || location.startsWith("https://")) {
synonymFile = new RemoteSynonymFile(env, analyzer, expand, format,
location);
} else {
synonymFile = new LocalSynonymFile(env, analyzer, expand, format,
location);
}
synonymMap = synonymFile.reloadSynonymMap();
2.3 创建配置文件
在工程的同级目录下新建一个 config/jdbc-reload.properties 配置文件,便于用户对数据库进行修改。
##数据库相关配置
jdbc.url=jdbc:postgresql://192.168.***.***:5432/search
jdbc.user=***
jdbc.password=***
jdbc.reload.synonym.sql=SELECT words FROM public.sys_synonym_t where is_vaild = true
jdbc.lastModified.synonym.sql=SELECT max(last_modify_dt) as last_modify_dt FROM public.sys_synonym_t
jdbc.driver=org.postgresql.Driver
2.4 修改 plugin.xml 文件
2.5 编译并打包
2.6 上传至服务器
2.5.1 在 ES 的安装路径下的 plugins 文件夹下,新建 analyzer-synonym 文件夹
[root@console plugins]# pwd
${ELASTIC_HOME}/plugins
[root@console plugins]# ll
total 8
drwxrwxr-x 3 elastic elastic 4096 Aug 29 16:49 analyzer-synonym
drwxrwxr-x 3 elastic elastic 4096 Aug 29 17:38 ik-analysis
2.5.2 解压并修改用户所属组
[root@console analyzer-synonym]# ls
elasticsearch-analysis-dynamic-synonym-7.2.0.zip
[root@console analyzer-synonym]# unzip elasticsearch-analysis-dynamic-synonym-7.2.0.zip
Archive: elasticsearch-analysis-dynamic-synonym-7.2.0.zip
creating: config/
inflating: config/jdbc-reload.properties
inflating: plugin-descriptor.properties
inflating: plugin-security.policy
inflating: httpclient-4.4.1.jar
inflating: httpcore-4.4.1.jar
inflating: commons-logging-1.2.jar
inflating: commons-codec-1.9.jar
inflating: postgresql-9.4.1212.jar
inflating: mysql-connector-java-5.1.47.jar
inflating: elasticsearch-analysis-dynamic-synonym-7.2.0.jar
[root@console analyzer-synonym]# rm -rf elasticsearch-analysis-dynamic-synonym-7.2.0.zip
[root@console analyzer-synonym]# chown -R elastic:elastic ./*
2.5.3 重启 Elasticsearch 服务
IK分词器的远程词典热词加载方式如下:https://blog.youkuaiyun.com/weixin_43315211/article/details/99650363
3、测试
新建一个 mapping
PUT synonyms_index
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 1,
"analysis": {
"analyzer": {
"synonym": {
"type":"custom",
"tokenizer": "ik_smart_custom",
"filter": ["synonym_custom"]
}
},
"filter": {
"synonym_custom": {
"type": "dynamic_synonym",
"synonyms_path": "fromDB"
}
}
}
},
"mappings": {
"properties": {
"name": {
"type": "text",
"analyzer": "synonym"
}
}
}
}
测试:
GET /synonyms_index/_analyze
{
"text": "开心",
"analyzer": "synonym"
}
{
"tokens" : [
{
"token" : "开心",
"start_offset" : 0,
"end_offset" : 2,
"type" : "CN_WORD",
"position" : 0
},
{
"token" : "高兴",
"start_offset" : 0,
"end_offset" : 2,
"type" : "SYNONYM",
"position" : 0
}
]
}
可以明显的看出,已经进行了同义词分词。
对同义词库进行新增同义词
查看 elasticsearch 服务器日志
可以看到同义词库已经进行更新
GET /synonyms_index/_analyze
{
"text": "开心",
"analyzer": "synonym"
}
{
"tokens" : [
{
"token" : "开心",
"start_offset" : 0,
"end_offset" : 2,
"type" : "CN_WORD",
"position" : 0
},
{
"token" : "高兴",
"start_offset" : 0,
"end_offset" : 2,
"type" : "SYNONYM",
"position" : 0
},
{
"token" : "开森",
"start_offset" : 0,
"end_offset" : 2,
"type" : "SYNONYM",
"position" : 0
}
]
}
后续对删除也进行了测试,同样可以实现。
问题解决
[2019-09-02T14:18:17,613][ERROR][o.w.a.d.Monitor ] [master01] erorr
org.postgresql.util.PSQLException: Your security policy has prevented the connection from being attempted. You probably need to grant the connect java.net.SocketPermission to the database server host and port that you wish to connect to.
at org.postgresql.Driver.connect(Driver.java:287) ~[postgresql-9.4.1212.jar:9.4.1212]
at java.sql.DriverManager.getConnection(DriverManager.java:664) ~[?:1.8.0_191]
at java.sql.DriverManager.getConnection(DriverManager.java:247) ~[?:1.8.0_191]
at org.wltea.analyzer.dic.Dictionary.loadDBStopWordsDict(Dictionary.java:573) [elasticsearch-analysis-ik-7.2.0.jar:?]
at org.wltea.analyzer.dic.Dictionary.access$300(Dictionary.java:61) [elasticsearch-analysis-ik-7.2.0.jar:?]
at org.wltea.analyzer.dic.Dictionary$StopDictReloadThread.run(Dictionary.java:718) [elasticsearch-analysis-ik-7.2.0.jar:?]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_191]
Caused by: java.security.AccessControlException: access denied ("java.net.SocketPermission" "192.168.108.126:5432" "connect,resolve")
at java.security.AccessControlContext.checkPermission(AccessControlContext.java:472) ~[?:1.8.0_191]
at java.security.AccessController.checkPermission(AccessController.java:884) ~[?:1.8.0_191]
at java.lang.SecurityManager.checkPermission(SecurityManager.java:549) ~[?:1.8.0_191]
at java.lang.SecurityManager.checkConnect(SecurityManager.java:1051) ~[?:1.8.0_191]
at java.net.Socket.connect(Socket.java:584) ~[?:1.8.0_191]
at org.postgresql.core.PGStream.<init>(PGStream.java:61) ~[postgresql-9.4.1212.jar:9.4.1212]
at org.postgresql.core.v3.ConnectionFactoryImpl.openConnectionImpl(ConnectionFactoryImpl.java:144) ~[postgresql-9.4.1212.jar:9.4.1212]
at org.postgresql.core.ConnectionFactory.openConnection(ConnectionFactory.java:52) ~[postgresql-9.4.1212.jar:9.4.1212]
at org.postgresql.jdbc.PgConnection.<init>(PgConnection.java:216) ~[postgresql-9.4.1212.jar:9.4.1212]
at org.postgresql.Driver.makeConnection(Driver.java:404) ~[postgresql-9.4.1212.jar:9.4.1212]
at org.postgresql.Driver.connect(Driver.java:272) ~[postgresql-9.4.1212.jar:9.4.1212]
主要报错:
Caused by: java.security.AccessControlException: access denied ("java.net.SocketPermission" "192.168.108.126:5432" "connect,resolve")
java权限问题,需要在java中添加相应的权限:
[elastic@master01 bin]$ sudo vim $JAVA_HOME/jre/lib/security/java.policy
// Standard extensions get all permissions by default
grant codeBase "file:${{java.ext.dirs}}/*" {
permission java.security.AllPermission;
};
// default permissions granted to all domains
grant {
// Allows any thread to stop itself using the java.lang.Thread.stop()
// method that takes no argument.
// Note that this permission is granted by default only to remain
// backwards compatible.
// It is strongly recommended that you either remove this permission
// from this policy file or further restrict it to code sources
// that you specify, because Thread.stop() is potentially unsafe.
// See the API specification of java.lang.Thread.stop() for more
// information.
permission java.lang.RuntimePermission "stopThread";
// allows anyone to listen on dynamic ports
permission java.net.SocketPermission "localhost:0", "listen";
//添加对应的权限
permission java.net.SocketPermission "*", "connect,resolve";