为什么会有这篇文章?
ES的java有2个主流客户端,官方的和spring data的。
不管哪一个,关于suggestion的代码示例都非常少。
这里就是为了弥补这个空白。
基于Spring data 的ES客户端
phrase suggestion的代码示例
public SearchResponse phraseSuggest(String indexName, final String text, final boolean isPinyin){
//log.info("phrase suggest: {}", text);
final String field = isPinyin? "ner_pinyin.trigram": "ner.trigram";
//final int maxEdit = isPinyin? 2: 1;
final int maxEdit = 2;
QueryBuilder queryBuilder = QueryBuilders.matchPhraseQuery("{{field_name}}", "{{suggestion}}");
Map<String, Object> params = new HashMap<>();
params.put("field_name", "ner");
final PhraseSuggestionBuilder phraseSuggestionBuilder = SuggestBuilders.phraseSuggestion(field)
.addCandidateGenerator(new DirectCandidateGeneratorBuilder(field)
.suggestMode("always") //如果是搜索,建议为always。如果是纠错,可能missing更合适
.maxEdits(maxEdit) //default 2
.prefixLength(0) //default 1
.minWordLength(2) //default 4
.maxInspections(5) //default 5, it times shards_size. bigger but slower
.size(100) //default 5
)
.text(text)
.maxErrors(maxEdit) //TODO:
.confidence(1) //default 1.0f //如果是0,或比较低的值,会返回自己
.gramSize(3)
//.realWordErrorLikelihood(0.95f)
.shardSize(100) //这个设置太小,有些结果会无法召回,例如:深圳市龙岗区龙岗街道宝平路,无法召回 宝坪路
.size(100)
.highlight("<em>", "</em>")
.collateQuery(queryBuilder.toString())
.collateParams(params)
.collatePrune(true);
final SuggestBuilder suggestBuilder = new SuggestBuilder().addSuggestion(SUGGESTION_FIELD, phraseSuggestionBuilder);
final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.suggest(suggestBuilder);
final SearchRequest searchRequest = new SearchRequest();
searchRequest.indices(indexName);
searchRequest.source(searchSourceBuilder);
try{
SearchResponse searchResponse = this.client.search(searchRequest, RequestOptions.DEFAULT);
return searchResponse;
}catch(Exception e){
log.error("{}", e.toString());
return null;
}
}
public List<String> handlePossibleMistakeSuggestions(String inputQuery, List<SearchResponse> searchResponses){
List<String> suggestResults = new ArrayList<>();
//Map<String, Float> suggestionsWithScore = new HashMap<>();
for(SearchResponse searchResponse: searchResponses) {
searchResponse.getSuggest().getSuggestion(SUGGESTION_FIELD).getEntries().forEach(entry -> entry.forEach(option -> {
// 不带高亮标签的返回结果
final String esRawPhraseSuggest = option.getText().toString();
/*
// 带高亮标签的返回结果
final String esHighlightSuggest = String.valueOf(option.getHighlighted());
List<String> highLightWords = extractHighLightWords(esHighlightSuggest);
if (highLightWords.size()>1) {
log.warn("{} has more than one highlight", esHighlightSuggest);
}
*/
Float score = option.getScore();
boolean collateMatch = option.collateMatch();
log.info("phrase suggest({}): {}({})",collateMatch, esRawPhraseSuggest, score);
String newSuggestion = ingoreNumberFixed(inputQuery, esRawPhraseSuggest);
if(!suggestResults.contains(newSuggestion)){
suggestResults.add(newSuggestion);
}
}));
}
return suggestResults;
}