这是我一年前研究Elasticsearch 做的案例,方便以后使用的时候借鉴
工具类
package com.szkingdom;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import java.net.InetAddress;
import java.net.UnknownHostException;
/**
* 2018/10/8
* 创建人:Hezm
*/
public class ESTools {
private static TransportClient client;
private static Settings settings = Settings.builder()
.put("cluster.name","my-application")
.put("client.transport.sniff",false)
.build();
public static synchronized TransportClient getClient(){
if(null == client){
try {
client = new PreBuiltTransportClient(settings)
.addTransportAddress(new TransportAddress(
InetAddress.getByName("127.0.0.1"),
9300));
} catch (UnknownHostException e) {
e.printStackTrace();
}
}
return client;
}
}
实际案例:
package com.szkingdom;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.bulk.*;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.get.MultiGetItemResponse;
import org.elasticsearch.action.get.MultiGetResponse;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.MultiSearchResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.reindex.BulkByScrollResponse;
import org.elasticsearch.index.reindex.DeleteByQueryAction;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.script.mustache.SearchTemplateRequestBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.filter.Filters;
import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator;
import org.elasticsearch.search.aggregations.bucket.global.Global;
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
import org.elasticsearch.search.aggregations.bucket.missing.Missing;
import org.elasticsearch.search.aggregations.bucket.nested.Nested;
import org.elasticsearch.search.aggregations.bucket.nested.ReverseNested;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.metrics.geobounds.GeoBounds;
import org.elasticsearch.search.aggregations.metrics.geobounds.GeoBoundsAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.max.Max;
import org.elasticsearch.search.aggregations.metrics.max.MaxAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.min.Min;
import org.elasticsearch.search.aggregations.metrics.min.MinAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.percentiles.Percentile;
import org.elasticsearch.search.aggregations.metrics.percentiles.Percentiles;
import org.elasticsearch.search.aggregations.metrics.percentiles.PercentilesAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.stats.Stats;
import org.elasticsearch.search.aggregations.metrics.tophits.TopHits;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.sort.FieldSortBuilder;
import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
/**
* Elasticsearch6.4.0版本:ElasticsearchDemo
* 2018/8/25
* 创建人:Hezm
*/
public class ElasticsearchDemo {
//使用TransportClient客户端 在8.0版本es要启用这个客户端。
private TransportClient client;
public ElasticsearchDemo() {
Settings settings = Settings.builder()
.put("cluster.name","my-application")
.put("client.transport.sniff",false)
.build();
try {
this.client = new PreBuiltTransportClient(settings)
.addTransportAddress(new TransportAddress(
InetAddress.getByName("127.0.0.1"),
9300));
} catch (UnknownHostException e) {
e.printStackTrace();
}
}
public void close(){
client.close();
}
/**
* 索引API允许将类型化的JSON文档索引到特定索引中 并使其可搜索。
* 生成json文档四种方式:
* 1、使用本机byte[]或作为一个手动
* 2、使用Map它将自动转换为其JSON等效项
* 3、使用第三方库序列化您的bean,例如 Jackson
* 4、使用内置帮助器XContentFactory.jsonBuilder()
*/
public void json(){
// 方式1:使用本机byte[]或作为一个手动
String json1 = "{" +
"\"user\":\"kimchy\"," +
"\"postDate\":\"2013-01-30\"," +
"\"message\":\"trying out Elasticsearch\"" +
"}";
// 方式2: 使用Map它将自动转换为其JSON等效项
Map<String, Object> json2 = new HashMap<String, Object>();
json2.put("user","kimchy");
json2.put("postDate",new Date());
json2.put("message","trying out Elasticsearch");
// 方式3:使用第三方库序列化您的bean,例如 Jackson
//Jackson 依赖包:https://search.maven.org/search?q=g:com.fasterxml.jackson.core
// instance a json mapper
// ObjectMapper mapper = new ObjectMapper(); // create once, reuse
// generate json
// byte[] json3 = mapper.writeValueAsBytes(yourbeaninstance);
// 方式4:使用内置帮助器XContentFactory.jsonBuilder()
try {
XContentBuilder builder = jsonBuilder()
.startObject()
.field("user", "kimchy")
.field("postDate", new Date())
.field("message", "trying out Elasticsearch")
.endObject();
String json = Strings.toString(builder);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 创建索引
* 方式1:将一个JSON文档索引到名为twitter的索引中,该索引名为tweet,id为1
* 方式2:将文档编入索引为JSON字符串,并且不必提供ID
* IndexResponse返回报告
* 报告内容: 索引:twitter 类型:tweet 文档id:1 版本:1 状态:CREATED
*/
public void createIndex(String id){
IndexResponse response = null;
// 方式1:将一个JSON文档索引到名为twitter的索引中,该索引名为tweet,id为1
try {
response = client.prepareIndex("twitter", "tweet", id)
.setSource(jsonBuilder()
.startObject()
.field("user", "kimchy2")
.field("postDate", new Date())
.field("message", "trying out Elasticsearch")
.field("gender", "女")
.field("wigth", 10)
.endObject()
).get();
} catch (IOException e) {
e.printStackTrace();
}
// 方式2:将文档编入索引为JSON字符串,并且不必提供ID
// String json = "{" +
// "\"user\":\"kimchy\"," +
// "\"postDate\":\"2013-01-30\"," +
// "\"message\":\"trying out Elasticsearch\"" +
// "}";
//
// IndexResponse response = client.prepareIndex("twitter", "tweet")
// .setSource(json, XContentType.JSON)
// .get();
// IndexResponse返回报告:
// 索引名
String _index = response.getIndex();
// 类型名
String _type = response.getType();
// Document ID (generated or not)
String _id = response.getId();
// 版本 (if it's the first time you index this document, you will get: 1)
long _version = response.getVersion();
// 状态 has stored current instance statement.
RestStatus status = response.status();
System.out.println(" 索引:" + _index +" 类型:" + _type
+ " 文档id:" + _id +" 版本:" + _version + " 状态:" + status);
}
/**
* 查询操作:
* 1、Document操作类型:
* 单文档查询:index、get、delete、update(传统crud)
* 多文档查询:multi_get、 bulk (批量操作)
*
*
* 2、多索引查询:
* 1、数组方式:使用逗号进行分隔 test1,test2
* $ curl -XPOST localhost:9200/test1,test2/_search?pretty -d '{"query":{"match_all":{}}}'
* 2、匹配左右索引: "_all"
* $ curl -XPOST localhost:9200/_all/_search?pretty -d '{"query":{"match_all":{}}}'
* 3、通配符风格"test*"
* $ curl -XPOST localhost:9200/{test*}/_search?pretty -d '{"query":{"match_all":{}}}'
* 4、添加,删除索引: -logstash*,+test*
* $ curl -XPOST localhost:9200/-logstash*,+{test*}/_search?pretty -d '{"query":{"match_all":{}}}'
* 其他参数:
* ignore_unavailable 忽略不可用索引
* allow_no_indices 没有可用索引时,是否正常
* expand_wildcards 统配的对象是open索引还是close索引;
*
*
* 3、日期表达式索引:
* 格式: <static_name{date_math_expr{date_format|time_zone}}>
* 索引静态部分、日期表达式、日期格式化、时区默认utc
* http://www.cnblogs.com/xing901022/p/5289233.html
*/
/**
* get操作
* 用于验证文档是否存在,或 执行crud文档查询,get查询是实时查询,实时查询到检索结果;
* 设置:关闭实时查询-realtime=false 全局配置:action.get.realtime=false
* 注意:get操作会忽略字段类型为存储的字段,直接从_source中查询回来(映射类型中字段属性:类型、是否被存储、是否被分析)
* 检索操作:
* 需要经过处理,默认是1秒钟搜索到
*
* get操作知识点:
* ① 类型 _type 是可选的,指定 _all则匹配所有类型
* ② source过滤:
* 默认操作会返回_source字段,除非使用fields字段或禁用_source 字段{_source=false},
* curl -XGET 'http://localhost:9200/twitter/tweet/1?_source=false'
* 想要返回特定字段 用_source_include或者_source_exclude进行过滤。可以使用逗号分隔来设置多种匹配模式
* curl -XGET 'http://localhost:9200/twitter/tweet/1?_source_include=*.id&_source_exclude=entities'
* 如果希望返回特定的字段,也可以直接写上字段的名称:
* curl -XGET 'http://localhost:9200/twitter/tweet/1?_source=*.id,retweeted'
* ③fields返回特定字段:只能返回叶子字段, 元数据不能被返回,元数据会导致执行失败。
* curl -XGET 'http://localhost:9200/twitter/tweet/1?fields=title,content'
* ④仅返回_source字段,不必返回不必要的信息,浪费网络带宽
* curl -XGET 'http://localhost:9200/twitter/tweet/1/_source'
* ⑤routing 指定路由,当index指定了路由的时候,查询就一定要指定路由,否则信息查询不回来
* curl -XGET 'http://localhost:9200/twitter/tweet/1?routing=kimchy'
* ⑥Preference 分片索引
* 给get请求维护分片索引,
* 设置:
* _primary:分片仅仅在主分片上执行
* _local:会在本地分片上执行
* Custom(string) value用户可以自定义值,相同分片设置相同的值。 保证不同刷新状态下,查询不同分片类似 sessionid/用户名
* ⑦refresh,刷新分片,设置为true; 刷新会带来系统压力,可考虑下性能。
* ⑧分布式:get操作会通过特定的hash方法,把请求给特定的分片进行查询,主分片和备份分片做一个组,都可以支持给请求
* 分片数量越多,get执行的规模就越大。
* ⑨version 版本: version参数检索文档,参数必须等于=当前版本,当 版本类型为FORCE的时候,所有版本类型都可以检索文档;
* es用版本号标记被删除或修改的数据,这些数据后台进行清理,也访问不到。
*
*/
public void get(){
GetResponse response = client.prepareGet("twitter", "tweet", "1").get();
System.out.printf("执行结果为:%s",response.getSourceAsString());
}
/**
* 删除
*
*/
public void delete(){
DeleteResponse response = client.prepareDelete("twitter", "tweet", "1").get();
}
/**
* 通过查询删除
* 1、同步执行,返回执行删除数量
* 2、异步执行,返回执行删除数量
*/
public void deleteByQuery(){
//根据查询结果删除给定的文档集
BulkByScrollResponse response = DeleteByQueryAction.INSTANCE.newRequestBuilder(client)
//query
.filter(QueryBuilders.matchQuery("gender", "male"))
//index
.source("twitter")
//execute the operation(执行操作)
.get();
//number of deleted documents(删除文件数量)
long deleted = response.getDeleted();
System.out.printf("删除数量:"+ deleted);
//整个过程可能是一个长时间的操作,当想异步执行的时候:调用execute执行操作时提供一个监听
// DeleteByQueryAction.INSTANCE.newRequestBuilder(client)
// .filter(QueryBuilders.matchQuery("gender", "male"))
// .source("persons")
// //开启监听方式执行操作;
// .execute(new ActionListener<BulkByScrollResponse>() {
// @Override
// public void onResponse(BulkByScrollResponse response) {
// //number of deleted documents(删除文件数量)
// long deleted = response.getDeleted();
// }
// @Override
// public void onFailure(Exception e) {
// // Handle the exception
// }
// });
}
/**
* 修改操作
* 方式1:创建UpdateRequest,并将其发送给客户端,由客户端执行
* 方式2:prepareUpdate()方法
* 更多方式:https://www.elastic.co/guide/en/elasticsearch/client/java-api/6.4/java-docs-update.html
*/
public void update(){
// 方式1:创建UpdateRequest,并将其发送给客户端,由客户端执行
UpdateRequest updateRequest = new UpdateRequest();
updateRequest.index("twitter");
updateRequest.type("tweet");
updateRequest.id("14");
try {
updateRequest.doc(jsonBuilder()
.startObject()
.field("gender", "male")
.field("user", "hezm")
.endObject());
client.update(updateRequest).get();
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
} catch (ExecutionException e) {
e.printStackTrace();
}
// 方式2:prepareUpdate()方法
// client.prepareUpdate("ttl", "doc", "1")
//// ScriptService.ScriptType.FILE 为本地存储脚本名称
// .setScript(new Script("ctx._source.gender = \"male\"" ,
// ScriptService.ScriptType.INLINE, null, null))
// .get();
// client.prepareUpdate("ttl", "doc", "1")
//// 将合并到现有文档的文档,不能同时提供doc 和 Script
// .setDoc(jsonBuilder()
// .startObject()
// .field("gender", "male")
// .endObject())
// .get();
}
/**
* 多条件获取 multi get
* 允许根据index、type、id获取文档列表
*/
public void multiGet(){
MultiGetResponse multiGetItemResponses = client.prepareMultiGet()
// 得到一个id
.add("twitter", "tweet", "14")
// 或者通过相同索引,类型的id列表
.add("twitter", "tweet", "2", "3", "4")
.get();
// 遍历结果集;
for (MultiGetItemResponse itemResponse : multiGetItemResponses) {
GetResponse response = itemResponse.getResponse();
// 检查文档是否存在
if (response.isExists()) {
// 进入该_source领域
String json = response.getSourceAsString();
System.out.printf("getSource = %s",json);
}
}
}
/**
* 批量获取 bulk get
* 在单个请求中增加和删除多个文档
*/
public void bulkGet(){
BulkRequestBuilder bulkRequest = client.prepareBulk();
// either use client#prepare, or use Requests# to directly build index/delete requests
try {
bulkRequest.add(client.prepareIndex("twitter", "tweet","2")
.setSource(jsonBuilder()
.startObject()
.field("user", "kimchy")
.field("postDate", new Date())
.field("message", "trying out Elasticsearch")
.endObject()
)
);
} catch (IOException e) {
e.printStackTrace();
}
// 增加第二个增加内容
try {
bulkRequest.add(client.prepareIndex("twitter", "tweet", "3")
.setSource(jsonBuilder()
.startObject()
.field("user", "kimchy")
.field("postDate", new Date())
.field("message", "another post")
.endObject()
)
);
} catch (IOException e) {
e.printStackTrace();
}
BulkResponse bulkResponse = bulkRequest.get();
if (bulkResponse.hasFailures()) {
// process failures by iterating through each bulk response item
//过程失败通过遍历每个bulk响应项;
}
}
/**
* 使用批量处理器
* BulkProcessor 接口 基于请求数或请求大小 批量操作,或在特定时期使用它,进行自动冲刷bulk操作。
*/
public void bulkProcessor(){
//创建批量处理器
BulkProcessor bulkProcessor = BulkProcessor.builder(
client, //client客户端
new BulkProcessor.Listener() {
@Override
public void beforeBulk(long executionId,
BulkRequest request) { } //批量执行之前执行此方法;查看numberOfActions request.numberOfActions();
@Override
public void afterBulk(long executionId,
BulkRequest request,
BulkResponse response) { } //批量执行后调用此方法。例如,您可以检查是否存在一些失败的请求response.hasFailures()
@Override
public void afterBulk(long executionId, //当批量失败并引发Throwable时调用此方法
BulkRequest request,
Throwable failure) { }
})
.setBulkActions(10000) //每10000个请求执行批量处理 默认值:1000
.setBulkSize(new ByteSizeValue(5, ByteSizeUnit.MB)) //每5MB冲洗一次 默认值:5MB
.setFlushInterval(TimeValue.timeValueSeconds(5)) //无论请求数5s 刷新一次 默认不设置
.setConcurrentRequests(1) //并发请求数 0: 只允许执行单个请求 1:允许执行1个并发请求,同时累积新的批量请求;异步执行 默认值:1
.setBackoffPolicy(
// 设置自定义退避策略,最初等待100毫秒,呈指数级增长最多重试3次,当批量项请求失败,尝试重试
// EsRejectedExecutionException 这表示可用于处理请求的计算资源太少。要禁用退避策略,请传递BackoffPolicy.noBackoff()
//默认为指数退避,重试次数8次, 启动延迟 50ms 中等待时间 5.1秒;
BackoffPolicy.exponentialBackoff(TimeValue.timeValueMillis(100), 3))
.build();
// 添加请求 doc 是什么意思?
// bulkProcessor.add(new IndexRequest("twitter", "tweet", "1").source(/* your doc here */));
bulkProcessor.add(new DeleteRequest("twitter", "tweet", "2"));
//两种关闭方式都会冲刷剩下的documents 且 如果有设置flushInterval则禁用这个刷新;
//关闭批处理器 1:会等待bulkRequests 返回结果,得当所有请求完成结果 则返回true, 如果没有等到则返回false。
try {
bulkProcessor.awaitClose(10, TimeUnit.MINUTES);
} catch (InterruptedException e) {
e.printStackTrace();
}
// 关闭批处理2: 不会等待任何批量操作完成立即退出
bulkProcessor.close();
/*测试单线程使用方法:*/
//如果您正在使用Elasticsearch运行测试并使用BulkProcessor填充数据集,则应该更好地设置并发请求的数量,
// 设置0以便以同步方式执行批量的刷新操作:
// bulkProcessor = BulkProcessor.builder(client, new BulkProcessor.Listener() { /* Listener methods */ })
// .setBulkActions(10000)
// .setConcurrentRequests(0)
// .build();
// 添加请求
// bulkProcessor.add(/* Your requests */);
// 刷新多个请求
bulkProcessor.flush();
// 关闭请求
bulkProcessor.close();
// 刷新indices
client.admin().indices().prepareRefresh().get();
// Now you can start searching!
client.prepareSearch().get();
}
/**
* 因项目主要以查询统计为主,
* 此处省略:
* Update By Query 根据查询修改
* Reindex :重建索引
*/
/**
* search Api
* 1、使用 Scrolls 在java
* 2、多搜索API
* 3、聚合操作
* 4、终止后
* 5、搜索模板
*/
/**
* 使用 Scrolls 在java:
* Scrolls 与游标非常相似, 主要针对大数据量处理;请求返回一个结果“页”,可以用来检索大量的结果。返回结果是一个时刻索引状态-快照。
* 例如:为了使用不同的配置来重新索引一个 index 到另一个 index 中去。
* 详细解释: https://www.jianshu.com/p/14aa8b09c789
* 1、scroll=1m : 告诉 Elasticsearch 需要保持搜索的上下文环境多久
* scroll_id可以用来传递给 scroll API 来检索下一个批次的结果。(get/post请求都可以使用);
* 2、search_type=scan :不对结果进行打分排序,实现高效返回结果
* track_scores=true 及时不进行排序,也进行一次打分;
* 3、检查有多少上下文开启了
* curl -XGET localhost:9200/_nodes/stats/indices/search?pretty
* 4、清除scroll
* clear-scroll
* (没弄明白,游标用的较少;)
*/
public void scrolls(){
QueryBuilder qb = termQuery("user", "kimchy");
SearchResponse scrollResp = client.prepareSearch("test")
.addSort(FieldSortBuilder.DOC_FIELD_NAME, SortOrder.ASC)
.setScroll(new TimeValue(60000))
.setQuery(qb)
.setSize(10).get(); //max of 100 hits will be returned for each scroll 每次滚动最大返回hits
//Scroll until no hits are returned 直到没有返回
do {
for (SearchHit hit : scrollResp.getHits().getHits()) {
//Handle the hit...
}
scrollResp = client.prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(60000)).execute().actionGet();
} while(scrollResp.getHits().getHits().length != 0); // Zero hits mark the end of the scroll and the while loop. 0则滚动结束
}
/**
* 多搜索
*
* 1、使用QueryBuilder
* * termQuery("key", obj) 完全匹配
* * termsQuery("key", obj1, obj2..) 一次匹配多个值
* * matchQuery("key", Obj) 单个匹配, field不支持通配符, 前缀具高级特性
* * multiMatchQuery("text", "field1", "field2"..); 匹配多个字段, field有通配符忒行
* * matchAllQuery(); 匹配所有文件
*
* 2、 组合查询
* * must(QueryBuilders) : AND
* * mustNot(QueryBuilders): NOT
* * should: : OR
*
* 3、只查询一个id的
* * QueryBuilders.idsQuery(String...type).ids(Collection<String> ids)
*
* 4、包裹查询
* * 高于设定分数, 不计算相关性
* QueryBuilders.constantScoreQuery(QueryBuilders.termQuery("name", "kimchy")).boost(2.0f);
* * 过滤查询
* QueryBuilders.constantScoreQuery(FilterBuilders.termQuery("name", "kimchy")).boost(2.0f);
*
* 5、disMax查询
* * 对子查询的结果做union, score沿用子查询score的最大值,
* * 广泛用于muti-field查询
* QueryBuilders.disMaxQuery()
* .add(QueryBuilders.termQuery("user", "kimch")) // 查询条件
* .add(QueryBuilders.termQuery("message", "hello"))
* .boost(1.3f)
* .tieBreaker(0.7f);
* 6、模糊匹配
* QueryBuilders.fuzzyQuery("user", "kimch");
* https://www.cnblogs.com/wenbronk/p/6432990.html
*
* 父或子文档查询
* 基于内容推荐:实现一句话相似文章查询【非常匹配】
* 前缀查询
* ??查询解析查询字符串
* 范围内查询(上届下届)
* 跨度查询:
* 通配符查询 *匹配任何字符序列,避免*开始造成检索大量内容造成效率缓慢
* 嵌套查询,内嵌文档
* 索引查询
* * 查询遍历抽取
* * 对response结果的分析
* * 对结果设置高亮显示
*
*/
public void multiSearch(){
SearchRequestBuilder srb1 = client
.prepareSearch().setQuery(QueryBuilders.queryStringQuery("elasticsearch")).setSize(1);
SearchRequestBuilder srb2 = client
.prepareSearch().setQuery(QueryBuilders.matchQuery("user", "kimchy")).setSize(1);
MultiSearchResponse sr = client.prepareMultiSearch()
.add(srb1)
.add(srb2)
.get();
// You will get all individual responses from MultiSearchResponse#getResponses()
//你会得到所有单个响应 从MultiSearchResponse#getResponses()
long nbHits = 0;
for (MultiSearchResponse.Item item : sr.getResponses()) {
SearchResponse response = item.getResponse();
nbHits += response.getHits().getTotalHits();
System.out.printf(nbHits + "搜索值:" + response.toString());
}
}
/**
* 聚合操作
* terms = group by
* select team, count(*) as player_count from player group by team;
* TermsBuilder teamAgg= AggregationBuilders.terms("player_count ").field("team");
* sbuilder.addAggregation(teamAgg);
* SearchResponse response = sbuilder.execute().actionGet();
*dateHistogram = 按照年分区
*
* https://blog.youkuaiyun.com/mlljava1111/article/details/70598519
* 桶(group by) + 指标(select...from) SELECT grade,class,count(1) FROM student GROUP BY grade,class;
*/
public void aggregations(){
SearchResponse sr = client.prepareSearch("twitter")
.setQuery(QueryBuilders.matchAllQuery())
.addAggregation(
AggregationBuilders.terms("gender_count").field("gender")
)
.addAggregation(
AggregationBuilders.dateHistogram("date_year")
.field("postDate")
.dateHistogramInterval(DateHistogramInterval.YEAR)
)
.get();
// Get your facet results : Terms:条款 Histogram:柱状图
Terms agg1 = sr.getAggregations().get("agg1");
Histogram agg2 = sr.getAggregations().get("agg2");
}
/**
* 终止后
* 文件的最大数量为每个碎片收集到达查询执行条件将提前终止。
* 如果设置,您将能够检查操作是否被要求提前终止
*/
public void terminateAfter(){
SearchResponse sr = client.prepareSearch("twitter")
.setTerminateAfter(1000) //完成1000个docment之后
.get();
if (sr.isTerminatedEarly()) { //检查操作是否提前终止
// We finished early
}
}
/**
* 搜索模板
*/
public void searchTemplate() {
//定义模板 为一个 MAP<String, Object>
Map<String, Object> template_params = new HashMap<>();
template_params.put("param_gender", "male");
/*
方式1:在文件路径: config/scripts/template_gender.mustache 配置模板信息
{
"query" : {
"match" : {
"gender" : "{{param_gender}}"
}
}
}
方式2:还可以通过json的方式将模板存储在集群中
client.admin().cluster().preparePutStoredScript()
.setScriptLang("mustache")
.setId("template_gender")
.setSource(new BytesArray(
"{\n" +
" \"query\" : {\n" +
" \"match\" : {\n" +
" \"gender\" : \"{{param_gender}}\"\n" +
" }\n" +
" }\n" +
"}")).get();
*/
//创建 查询模板请求 search template request
SearchResponse sr = new SearchTemplateRequestBuilder(client)
.setScript("template_gender") //模板名
// .setScriptType(ScriptService.ScriptType.FILE) 存在磁盘上的模板 template_gender.mustache
// .setScriptType(ScriptType.STORED) 存储在集群上的模板
/*
内联模板:
.setScript("{\n" +
" \"query\" : {\n" +
" \"match\" : {\n" +
" \"gender\" : \"{{param_gender}}\"\n" +
" }\n" +
" }\n" +
"}")
.setScriptType(ScriptType.INLINE)
*/
.setScriptParams(template_params) //参数
.setRequest(new SearchRequest()) //设置执行上下文(定义索引名称)
.get() //执行并获取模板响应
.getResponse(); //从模板响应中获取搜索响应本身
}
/**
* 1、构建聚合: 可以在聚合内定义子聚合
* 聚合可以是度量聚合 或 桶聚合
* 案例:这是一个由各项组成的3级聚合
* ① 条款聚合(桶)terms
* ② 日期柱状图(桶) date Histogram
* ③ 平均聚合(桶) avg
*/
public void structuringAggregations(){
SearchResponse sr = client.prepareSearch()
.addAggregation(
AggregationBuilders.terms("by_country").field("country")
.subAggregation(AggregationBuilders.dateHistogram("by_year")
.field("dateOfBirth")
.dateHistogramInterval(DateHistogramInterval.YEAR)
.subAggregation(AggregationBuilders.avg("avg_children").field("children"))
)
)
.execute().actionGet();
}
/**
* 2、指标聚合metricsAggregations
*/
public void metricsAggregations(){
//构建聚合
SearchResponse sr = client.prepareSearch().execute().actionGet();
//创建min聚合请求
MinAggregationBuilder aggregation1 =
AggregationBuilders
.min("agg")
.field("height");
//执行聚合请求
Min agg1 = sr.getAggregations().get("agg");
double value1 = agg1.getValue();
//创建max聚合请求
MaxAggregationBuilder aggregation2 =
AggregationBuilders
.max("agg")
.field("height");
//执行max聚合请求
Max agg2 = sr.getAggregations().get("agg");
double value2 = agg2.getValue();
//类似 Sum, Avg, ValueCount
// Stats 状态
Stats agg = sr.getAggregations().get("agg");
double min = agg.getMin();
double max = agg.getMax();
double avg = agg.getAvg();
double sum = agg.getSum();
long count = agg.getCount();
/*ExtendedStats,增加:
double stdDeviation = agg.getStdDeviation();
double sumOfSquares = agg.getSumOfSquares();
double variance = agg.getVariance();
* */
//percentiles 百分位数
PercentilesAggregationBuilder aggregation =
AggregationBuilders
.percentiles("agg")
.field("height")
.percentiles(1.0, 5.0, 10.0, 20.0, 30.0, 75.0, 95.0, 99.0);
//percentiles 使用聚合响应结果
// sr is here your SearchResponse object
Percentiles agg3 = sr.getAggregations().get("agg");
// For each entry
for (Percentile entry : agg3) {
double percent = entry.getPercent(); // Percent
double value = entry.getValue(); // Value
// logger.info("percent [{}], value [{}]", percent, value);
}
/*
此处省略:
PercentileRanks: 百分位排名
Cardinality :基数
*/
/*
GeoBounds: 地理范围
*/
// GeoBoundsBuilder aggregation = //准备聚合请求
// GeoBoundsAggregationBuilder
// .geoBounds("agg")
// .field("address.location")
// .wrapLongitude(true);
GeoBounds agg4 = sr.getAggregations().get("agg"); //执行聚合请求
GeoPoint bottomRight = agg4.bottomRight();
GeoPoint topLeft = agg4.topLeft();
// logger.info("bottomRight {}, topLeft {}", bottomRight, topLeft);
//top hits 点击率聚合
AggregationBuilder aggregation5 =
AggregationBuilders
.terms("agg").field("gender")
.subAggregation(
AggregationBuilders.topHits("top")
.explain(true).size(1).from(10).sort("desc")
// .highlighter(HighlightBuilder.fromXContent())
);
//top hits处理响应
Terms agg6 = sr.getAggregations().get("agg");
// For each entry
for (Terms.Bucket entry : agg6.getBuckets()) {
// String key = entry.getKey(); // bucket key
long docCount = entry.getDocCount(); // Doc count
// logger.info("key [{}], doc_count [{}]", key, docCount);
// We ask for top_hits for each bucket
TopHits topHits = entry.getAggregations().get("top");
for (SearchHit hit : topHits.getHits().getHits()) {
// logger.info(" -> id [{}], _source [{}]", hit.getId(), hit.getSourceAsString());
}
}
}
/**
* 3、桶聚合 Bucket aggregations
* Global Aggregation
*/
public void bucketAggregation(){
//构建聚合
SearchResponse sr = client.prepareSearch().execute().actionGet();
//创建global请求
AggregationBuilders
.global("agg")
.subAggregation(AggregationBuilders.terms("genders").field("gender"));
//使用响应
// sr is here your SearchResponse object
Global agg = sr.getAggregations().get("agg");
agg.getDocCount(); // Doc count
//filter聚合
AggregationBuilder aggregation =
AggregationBuilders
.filters("agg",
new FiltersAggregator.KeyedFilter("men", QueryBuilders.termQuery("gender", "male")),
new FiltersAggregator.KeyedFilter("women", QueryBuilders.termQuery("gender", "female")));
Filters agg2 = sr.getAggregations().get("agg");
// For each entry
for (Filters.Bucket entry : agg2.getBuckets()) {
String key = entry.getKeyAsString(); // bucket key
long docCount = entry.getDocCount(); // Doc count
// logger.info("key [{}], doc_count [{}]", key, docCount);
}
//missing 失踪聚合
AggregationBuilders.missing("agg").field("gender");
// sr is here your SearchResponse object
Missing agg1 = sr.getAggregations().get("agg");
agg1.getDocCount(); // Doc count
//nested 嵌套聚合
AggregationBuilders
.nested("agg", "resellers");
// sr is here your SearchResponse object
Nested agg3 = sr.getAggregations().get("agg");
agg.getDocCount(); // Doc count
//reverseNested 反向嵌套聚合
AggregationBuilder aggregation1 =
AggregationBuilders
.nested("agg", "resellers")
.subAggregation(
AggregationBuilders
.terms("name").field("resellers.name")
.subAggregation(
AggregationBuilders
.reverseNested("reseller_to_product")
)
);
Nested agg4 = sr.getAggregations().get("agg");
Terms name = agg4.getAggregations().get("name");
for (Terms.Bucket bucket : name.getBuckets()) {
ReverseNested resellerToProduct = bucket.getAggregations().get("reseller_to_product");
resellerToProduct.getDocCount(); // Doc count
}
/*
* terms: 条款,
* order: 有多种方式,需查明具体区别
* rang:范围聚合,
* .addUnboundedTo(1.0f)//从-无穷大到1.0(不包括)
.addRange(1.0f,1.5f)//从1.0到1.5(不包括)
.addUnboundedFrom(1.5F); //从1.5到+无穷大
*date:日期聚合
* .format( “YYYY”)
.addUnboundedTo(“1950”)//从-无穷大到1950(不包括在内)
.addRange(“1950”,“1960”)// 1950年至1960年(不包括在内)
.addUnboundedFrom( “1960”); //从1960年到+无限
* IPRange聚合
* .addUnboundedTo(“192.168.1.0”)//从-无穷大到192.168.1.0(不包括)
.addRange(“192.168.1.0”,“192.168.2.0”)//从192.168.1.0到192.168.2.0(不包括)
.addUnboundedFrom( “192.168.2.0”); //从192.168.2.0到+无穷大
设置ip mask作为范围
.addMaskRange( “192.168.0.0/32”)
.addMaskRange( “192.168.0.0/24”)
.addMaskRange( “192.168.0.0/16”);
* Histogram直方图聚合
* .interval(1);
*日期直方图聚合:(一年间隔) DateHistogramInterval.days(10)10天间隔
* .dateHistogram(“agg”)。
field(“dateOfBirth”)。
dateHistogramInterval(DateHistogramInterval.YEAR);
*GEO Distance Aggregation地理位置聚合
* .geoDistance("agg", new GeoPoint(48.84237171118314,2.33320027692004))
.field("address.location")
.unit(DistanceUnit.KILOMETERS)
.addUnboundedTo(3.0)
.addRange(3.0, 10.0)
.addRange(10.0, 500.0);
*
*GEO HASH GRID 聚合
* .geohashGrid("agg")
.field("address.location")
.precision(4);
* */
}
/**
* Query DSL: 查询DSL,通过将请求写在JSON里面,进行相关查询
* 组成:
* 叶查询子句:这种查询可以单独使用,针对某一特定的字段查询特定的值,比如match、term、range等
* 复合查询子句:这种查询配合其他的叶查询或者复合查询,用于在逻辑上,组成更为复杂的查询,比如bool
*
* 1、全匹配查询
* 2、全文查询
* 3、项级别查询
* 4、复合查询
* 5、联合查询
* 6、地理查询
* 7、专业查询
* 8、跨度查询
*/
/**
* API管理
* 1、指数管理
* 2、集群管理
*/
}
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.szkingdom</groupId>
<artifactId>spring-test-elasticsearch</artifactId>
<version>1.0-SNAPSHOT</version>
<name>spring-test-elasticsearch</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<!--客户端必须与安装elasticsearch版本保持一致-->
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>6.4.0</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>6.4.0</version>
</dependency>
<!--log4j-->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.11.1</version>
</dependency>
</dependencies>
<!--Lucene Snapshot 存储库-->
<!--6.0.0-beta1依赖于Lucene 的版本7.0.0-snapshot-00142c9,则必须定义以下存储库对于Maven:-->
<repositories>
<repository>
<id>elastic-lucene-snapshots</id>
<name>Elastic Lucene Snapshots</name>
<url>http://s3.amazonaws.com/download.elasticsearch.org/lucenesnapshots/00142c9</url>
<releases><enabled>true</enabled></releases>
<snapshots><enabled>false</enabled></snapshots>
</repository>
</repositories>
<build>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<!-- see http://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.7.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.20.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>