
摘要
为了验证当前集群经常出现索引超时以及请求拒绝的问题,现模拟线上集群环境及索引设置,通过压测工具随机生成测试数据,针对当前的 850 个分片的索引,以及减半之后的索引,以及更小分片索引的写入进行压测,使用不同的并发、不同的批次大小来观察索引的吞吐情况,并记录写入队列的堆积情况,用来分析分片数、批次数对写入的影响,从而确定后续的优化方案。
压测场景
Elasticsearch 版本 v7.7.1, 共有 57 个节点,其中 3 个独立 Master,3 个协调节点,31GB JVM。
压测流程
单索引 850 分片
索引定义
PUT idx-xxxx-xxxxxx
{
"aliases" : {
"alias-xxxx-xxxxxx" : { }
},
"mappings" : {
"dynamic" : "strict",
"_routing" : {
"required" : true
},
"_source" : {
"excludes" : [
"isExtract*",
"batchNo"
]
},
"properties" : {
"addxxxx" : {
"type" : "text",
"term_vector" : "with_positions_offsets"
},
"clxxxx" : {
"type" : "byte"
},
"contxxxx" : {
"type" : "text",
"boost" : 4.0,
"term_vector" : "with_positions_offsets"
},
"conxxxx" : {
"type" : "keyword",
"doc_values" : false
},
"con1xxxx" : {
"type" : "text",
"boost" : 16.0,
"term_vector" : "with_positions_offsets",
"fields" : {
"keyword" : {
"type" : "keyword",
"normalizer" : "keyword_normalizer"
}
},
"analyzer" : "name_analyzer",
"search_analyzer" : "keyword_analyzer"
},
"contSxxxx" : {
"type" : "long",
"index" : false,
"doc_values" : false
},
"contSxxxxx" : {
"type" : "keyword",
"doc_values" : false
},
"contTxxxx" : {
"type" : "short"
},
"crtxxxx" : {
"type" : "date",
"ignore_malformed" : true,
"format" : "yyyyMMddHHmmss"
},
"duration" : {
"type" : "long",
"index" : false,
"doc_values" : false
},
"largeTxxxx" : {
"type" : "keyword",
"boost" : 8.0,
"index" : false,
"doc_values" : false
},
"md5" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"orderxxxx" : {
"type" : "alias",
"path" : "contName.keyword"
},
"ownxxxxxx" : {
"type" : "keyword",
"doc_values" : false
},
"ownxxxxxxxxxx" : {
"type" : "keyword",
"doc_values" : false
},
"ownxxxxxxxxxxx" : {
"type" : "keyword",
"doc_values" : false
},
"ownxxxxxxxxxxx" : {
"type" : "keyword",
"doc_values" : false
},
"parenxxxxxxxxxx" : {
"type" : "keyword"
},
"pathxx" : {
"type" : "text",
"boost" : 8.0,
"term_vector" : "with_positions_offsets",
"fields" : {
"keyword" : {
"type" : "keyword"
}
},
"analyzer" : "path_analyzer"
},
"presexxxxx" : {
"type" : "keyword",
"boost" : 8.0,
"index" : false,
"doc_values" : false
},
"presexxxxx" : {
"type" : "keyword",
"boost" : 8.0,
"index" : false,
"doc_values" : false
},
"presxxxxxx" : {
"type" : "keyword",
"boost" : 8.0,
"index" : false,
"doc_values" : false
},
"prixxxxxx" : {
"type" : "short",
"index" : false
},
"search_xxxxxx" : {
"type" : "alias",
"path" : "contName"
},
"servixxxxxx" : {
"type" : "byte"
},
"shotxxxxxx" : {
"type" : "date",
"ignore_malformed" : true,
"format" : "yyyyMMddHHmmss"
},
"xxxxxxlThuxxxxxx" : {
"type" : "keyword",
"boost" : 8.0,
"index" : false,
"doc_values" : false
},
"tagxxxxxx" : {
"type" : "text",
"term_vector" : "with_positions_offsets"
},
"thumxxxxxx" : {
"type" : "keyword",
"boost" : 8.0,
"index" : false,
"doc_values" : false
},
"xxxxxxpdxxxxxx" : {
"type" : "date",
"ignore_malformed" : true,
"format" : "yyyyMMddHHmmss"
},
"xxxxxxderAcxxxxxx" : {
"type" : "keyword",
"doc_values" : false
},
"xxxxxxerAccouxxxxxx" : {
"type" : "keyword",
"doc_values" : false
},
"xxxxxxerxxxxxxID" : {
"type" : "keyword",
"doc_values" : false
},
"xxxxxxderNxxxxxx" : {
"type" : "keyword",
"doc_values" : false
}
}
},
"settings" : {
"index" : {
"max_ngram_diff" : "50",
"refresh_interval" : "1s",
"number_of_shards" : "850",
"analysis" : {
"normalizer" : {
"keyword_normalizer" : {
"filter" : [
"lowercase"
],
"type" : "custom"
}
},
"analyzer" : {
"keyword_analyzer" : {
"filter" : [
"lowercase"
],
"type" : "custom",
"tokenizer" : "keyword"
},
"name_analyzer" : {
"filter" : [
"lowercase"
],
"type" : "custom",
"tokenizer" : "name_tokenizer"
},
"path_analyzer" : {
"filter" : [
"lowercase"
],

通过对Elasticsearch集群进行模拟线上环境的压测,分析850个分片和减半后的索引在不同并发、批次大小下的写入性能。发现大分片索引在低并发时就可能出现请求拒绝,而较小分片的索引具有更高的吞吐量。建议根据业务需求将大索引拆分为更小的子索引,以提高写入效率和系统稳定性。
最低0.47元/天 解锁文章
2887

被折叠的 条评论
为什么被折叠?



