Elasticsearch Sliced Scroll分页检索案例分享

Elasticsearch Sliced Scroll分页检索案例分享

我们在文章《[url=https://my.oschina.net/bboss/blog/1786493]Elasticsearch Scroll分页检索案例分享[/url]》中介绍了elasticsearch scroll的基本用法,本文介绍Elasticsearch Sliced Scroll分页检索功能。

[size=large][b]1.准备工作[/b][/size]
参考文档《[url=https://my.oschina.net/bboss/blog/1556866]高性能elasticsearch ORM开发库使用介绍[/url]》导入和配置es客户端

[size=large][b]2.定义Sliced Scroll检索dsl[/b][/size]
创建配置文件-在resources目录下定义文件scroll.xml
[b][color=blue]esmapper/scroll.xml[/color][/b]
文件内容包含Sliced Scroll检索dsl语句-scrollSliceQuery
<property name="scrollSliceQuery">
<![CDATA[
{
"slice": {
"id": $id,
"max": $max
},
"size":$size,
"query": {
"term" : {
"gc.jvmGcOldCount" : 3
}
}
}
]]>
</property>

[size=large][b]3.串行方式执行slice检索[/b][/size]
/**
* 串行方式执行slice scroll操作
*/
@Test
public void testSliceScroll() {
ClientInterface clientUtil = ElasticSearchHelper.getConfigRestClientUtil("esmapper/scroll.xml");
List<String> scrollIds = new ArrayList<>();
long starttime = System.currentTimeMillis();
//scroll slice分页检索
int max = 6;
long realTotalSize = 0;
for (int i = 0; i < max; i++) {
Map params = new HashMap();
params.put("id", i);
params.put("max", max);//最多6个slice,不能大于share数
params.put("size", 100);//每页100条记录
ESDatas<Map> sliceResponse = clientUtil.searchList("agentstat-*/_search?scroll=1m",
"scrollSliceQuery", params,Map.class);
List<Map> sliceDatas = sliceResponse.getDatas();
realTotalSize = realTotalSize + sliceDatas.size();
long totalSize = sliceResponse.getTotalSize();
String scrollId = sliceResponse.getScrollId();
if (scrollId != null)
scrollIds.add(scrollId);
System.out.println("totalSize:" + totalSize);
System.out.println("scrollId:" + scrollId);
if (sliceDatas != null && sliceDatas.size() >= 100) {//每页100条记录,迭代scrollid,遍历scroll分页结果
do {
sliceResponse = clientUtil.searchScroll("1m", scrollId, Map.class);
String sliceScrollId = sliceResponse.getScrollId();
if (sliceScrollId != null)
scrollIds.add(sliceScrollId);
sliceDatas = sliceResponse.getDatas();
if (sliceDatas == null || sliceDatas.size() < 100) {
break;
}
realTotalSize = realTotalSize + sliceDatas.size();
} while (true);
}
}
//打印处理耗时和实际检索到的数据
long endtime = System.currentTimeMillis();
System.out.println("耗时:"+(endtime - starttime)+",realTotalSize:"+realTotalSize);
//查询存在es服务器上的scroll上下文信息
String scrolls = clientUtil.executeHttp("_nodes/stats/indices/search", ClientUtil.HTTP_GET);
System.out.println(scrolls);
//处理完毕后清除scroll上下文信息
if(scrollIds.size() > 0) {
scrolls = clientUtil.deleteScrolls(scrollIds);
System.out.println(scrolls);
}
//清理完毕后查看scroll上下文信息
scrolls = clientUtil.executeHttp("_nodes/stats/indices/search", ClientUtil.HTTP_GET);
System.out.println(scrolls);
}

[size=large][b]4.并行方式执行slice检索[/b][/size]
//用来存放实际slice检索总记录数
long realTotalSize ;
//辅助方法,用来累计每次scroll获取到的记录数
synchronized void incrementSize(int size){
this.realTotalSize = this.realTotalSize + size;
}
/**
* 并行方式执行slice scroll操作
*/
@Test
public void testParralSliceScroll() {
final ClientInterface clientUtil = ElasticSearchHelper.getConfigRestClientUtil("esmapper/scroll.xml");
final List<String> scrollIds = new ArrayList<>();
long starttime = System.currentTimeMillis();
//scroll slice分页检索
final int max = 6;
final CountDownLatch countDownLatch = new CountDownLatch(max);//线程任务完成计数器,每个线程对应一个sclice,每运行完一个slice任务,countDownLatch计数减去1

for (int j = 0; j < max; j++) {//启动max个线程,并行处理每个slice任务
final int i = j;
Thread sliceThread = new Thread(new Runnable() {//多线程并行执行scroll操作做,每个线程对应一个sclice

@Override
public void run() {
Map params = new HashMap();
params.put("id", i);
params.put("max", max);//最多6个slice,不能大于share数
params.put("size", 100);//每页100条记录
ESDatas<Map> sliceResponse = clientUtil.searchList("agentstat-*/_search?scroll=1m",
"scrollSliceQuery", params,Map.class);
List<Map> sliceDatas = sliceResponse.getDatas();
incrementSize( sliceDatas.size());//统计实际处理的文档数量
long totalSize = sliceResponse.getTotalSize();
String scrollId = sliceResponse.getScrollId();
if (scrollId != null)
scrollIds.add(scrollId);
System.out.println("totalSize:" + totalSize);
System.out.println("scrollId:" + scrollId);
if (sliceDatas != null && sliceDatas.size() >= 100) {//每页100条记录,迭代scrollid,遍历scroll分页结果
do {
sliceResponse = clientUtil.searchScroll("1m", scrollId, Map.class);
String sliceScrollId = sliceResponse.getScrollId();
if (sliceScrollId != null)
scrollIds.add(sliceScrollId);
sliceDatas = sliceResponse.getDatas();
if (sliceDatas == null || sliceDatas.size() < 100) {
break;
}
incrementSize( sliceDatas.size());//统计实际处理的文档数量
} while (true);
}
countDownLatch.countDown();//slice检索完毕后计数器减1
}

});
sliceThread.start();//启动线程
}
try {
countDownLatch.await();//等待所有的线程执行完毕,计数器变成0
} catch (InterruptedException e) {
e.printStackTrace();
}
//打印处理耗时和实际检索到的数据
long endtime = System.currentTimeMillis();
System.out.println("耗时:"+(endtime - starttime)+",realTotalSize:"+realTotalSize);
//查询存在es服务器上的scroll上下文信息
String scrolls = clientUtil.executeHttp("_nodes/stats/indices/search", ClientUtil.HTTP_GET);
// System.out.println(scrolls);
//处理完毕后清除scroll上下文信息
if(scrollIds.size() > 0) {
scrolls = clientUtil.deleteScrolls(scrollIds);
// System.out.println(scrolls);
}
//清理完毕后查看scroll上下文信息
scrolls = clientUtil.executeHttp("_nodes/stats/indices/search", ClientUtil.HTTP_GET);
// System.out.println(scrolls);
}

通过串行运行和并行运行结果比较,并行处理的性能要好很多,实际检索到的文档数量等价一致。

[size=large][b]5.参考文档[/b][/size]
[url]https://www.elastic.co/guide/en/elasticsearch/reference/6.2/search-request-scroll.html[/url]

[size=large][b]6.开发交流[/b][/size]
elasticsearch技术交流群:166471282

elasticsearch微信公众号:
[img]https://static.oschina.net/uploads/space/2017/0617/094201_QhWs_94045.jpg[/img]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值