test_quick_select
用于评估是否可以使用索引进行范围扫描(range scan
)或索引合并扫描(index merge scan
),并选择成本最低的访问方法。
还会考虑全表扫描(table scan
、index scan
)的可能性,并在必要时优先选择范围扫描,并不是一定选择范围扫描。
int test_quick_select(THD *thd,
Key_map keys_to_use, //是当前的query可用的索引的位图信息
table_map prev_tables,
ha_rows limit, //limit的值
bool force_quick_range/* 强制使用范围扫描 */,
const enum_order interesting_order,/*范围访问方法必须能够提供的排序顺序,升序、降序、无所谓*/
const QEP_shared_owner *tab, //表相关信息
Item *cond,//条件
Key_map *needed_reg, /* used in make_join_query_block() 哪些索引可能被使用*/
QUICK_SELECT_I **quick,
bool ignore_table_scan, /*是否忽略全表扫描*/
Query_block *query_block);
相关变量
ignore_table_scan //是否忽略全表扫描 即当前不可使用全表扫描方法
force_quick_range //是否强制使用range_scan
keys_to_use //当前可用索引的位图信息
covering_keys //当前可用的覆盖索引的位图信息 在mark_used_column中判定与更新
函数流程
(1) 计算全表扫描的代价 table_scan_cost
const Cost_model_server *const cost_model = thd->cost_model();
TABLE *const head = tab->table();
ha_rows records = head->file->stats.records;//表统计信息 总记录数
if (!records) records++;
double scan_time = //全表扫描的cpu_cost
cost_model->row_evaluate_cost(static_cast<double>(records)) + 1;
Cost_estimate cost_est = head->file->table_scan_cost();//全表扫描的io_cost
cost_est.add_io(1.1);
cost_est.add_cpu(scan_time);
if (ignore_table_scan) {//如果选择了忽略全表扫描 设置最大cost值 确保不会使用
scan_time = DBL_MAX;
cost_est.set_max_cost();
}
if (limit < records) { //如果语句中没有limit 该值必定limit > records
cost_est.reset();//强制使用index_scan
cost_est.add_io(head->cost_model()->page_read_cost(static_cast<double>(records)) + 1);
cost_est.add_cpu(scan_time);
} else if (cost_est.total_cost() <= 2.0 && !force_quick_range)
//如果全表扫描的成本非常低,并且没有强制使用快速选择 那么全表扫描可以认为就是代价最低的方式
return 0; /* No need for quick select */
(2) 初始化PARAM
结构体,在未找到最佳的table_read_plan
前,后。续会根据条件cond
与索引信息param
构建一个range tree
,查找相对应的range_scan
扫描方式。
(3) 对可用索引进行选择
key_info = head->key_info;
//遍历表的所有索引,并根据给定条件决定哪些索引可以使用
for (uint idx = 0; idx < head->s->keys; idx++, key_info++) {
KEY_PART_INFO *key_part_info;
if (!keys_to_use.is_set(idx)) {//当前索引不可用
continue;
}
if (hint_key_state(thd, head->pos_in_table_list, idx,
NO_RANGE_HINT_ENUM, 0)) {//检查是否有用户提供的查询提示(hints)禁止对当前索引进行范围优化
continue;
}
if (key_info->flags & HA_FULLTEXT) { //full_text不适用于范围扫描
continue;
}
param.key[param.keys] = key_parts;
key_part_info = key_info->key_part;
for (uint part = 0; part < actual_key_parts(key_info);//当前索引可用 遍历索引的每一个part
part++, key_parts++, key_part_info++) {
key_parts->key = param.keys;
key_parts->part = part;
key_parts->length = key_part_info->length;
key_parts->store_length = key_part_info->store_length;
key_parts->field = key_part_info->field;
key_parts->null_bit = key_part_info->null_bit;
key_parts->image_type = (part < key_info->user_defined_key_parts &&
key_info->flags & HA_SPATIAL) ? Field::itMBR : Field::itRAW;
key_parts->flag = key_part_info->key_part_flag;
}
param.real_keynr[param.keys++] = idx;
}
(4) 是否有可选择的覆盖索引进行查询
/*在查询优化过程中,计算使用最短的覆盖索引进行全索引扫描,因为键长度最短的那个索引相应的IO_COST要小*/
if (!head->covering_keys.is_clear_all()) {//当前表中是否存在覆盖索引
int key_for_use = find_shortest_key(head, &head->covering_keys);
// find_shortest_key() should return a valid key:
assert(key_for_use != MAX_KEY);
//使用覆盖索引进行index_scan
Cost_estimate key_read_time = param.table->file->index_scan_cost(
key_for_use, 1, static_cast<double>(records));
key_read_time.add_cpu(
cost_model->row_evaluate_cost(static_cast<double>(records)));
bool chosen = false;
if (key_read_time < cost_est) { //cost_est 全表扫描的代价 评估代价 选择相应的扫描方式
cost_est = key_read_time;
chosen = true;
}
}
(5) 考虑各种range scan
tree = get_mm_tree(¶m, cond); //根据 where on 等条件 构建查找范围访问的备选方案
//对于group by的语句 尝试构建QUICK_GROUP_MIN_MAX_SELECT
group_trp = get_best_group_min_max(¶m, tree, &best_cost);
if (group_trp) {
DBUG_EXECUTE_IF("force_lis_for_group_by", group_trp->cost_est.reset(););
param.table->quick_condition_rows =
min(group_trp->records, head->file->stats.records);
Opt_trace_object grp_summary(trace, "best_group_range_summary",
Opt_trace_context::RANGE_OPTIMIZER);
if (unlikely(trace->is_started()))
group_trp->trace_basic_info(¶m, &grp_summary);
if (group_trp->cost_est < best_cost) {
grp_summary.add("chosen", true);
best_trp = group_trp;
best_cost = best_trp->cost_est;
} else
grp_summary.add("chosen", false).add_alnum("cause", "cost");
}
//对于单表的非group by 检查是否有强制使用skip_scan的hint
force_skip_scan = hint_table_state(
param.thd, param.table->pos_in_table_list, SKIP_SCAN_HINT_ENUM, 0);
(6) 尝试选择代价更低的range scan
扫描方法,包括可能的索引合并index_merge
/*
当前还存在可行的其他范围查找方法
并且best_trp还没有指定或者没有指定force hint
可通过get_key_scans_params探寻一个更优的范围扫描方法 但也可能不存在
*/
if (tree && (!best_trp || !best_trp->is_forced_by_hint())) { //best_trp, the best table_read_plan
//It is possible to use a range-based quick select (but it might be slower than 'all' table scan).
{
/*
Calculate cost of single index range scan and possible intersections of these
*/
TRP_RANGE *range_trp; // range_scan
TRP_ROR_INTERSECT *rori_trp; // ROR扫描
/* Get best 'range' plan and prepare data for making other plans */
if ((range_trp = // get_key_scans_params会对param中所有的key 计算扫描行数以及相应的cost 选择代价更低的range_scan
get_key_scans_params(¶m, tree, false, true, &best_cost))) {
best_trp = range_trp;
best_cost = best_trp->cost_est;
}
/*
Simultaneous key scans and row deletes on several handler
objects are not allowed so don't use ROR-intersection for
table deletes. Also, ROR-intersection cannot return rows in
descending order
*/
if ((thd->lex->sql_command != SQLCOM_DELETE) && //非delete
(param.index_merge_allowed || // 允许索引合并 或者 用户给了相关的提示
hint_table_state(param.thd, param.table->pos_in_table_list,
INDEX_MERGE_HINT_ENUM, 0)) &&
interesting_order != ORDER_DESC) { //无需以降序返回结果
//Get best non-covering ROR-intersection plan and prepare data for building covering ROR-intersection.
if ((rori_trp =
get_best_ror_intersect(¶m, tree, &best_cost, true))) {
best_trp = rori_trp;
best_cost = best_trp->cost_est;
}
}
}
// Here we calculate cost of union index merge
if (!tree->merges.is_empty()) {
// Cannot return rows in descending order.
if ((param.index_merge_allowed ||
hint_table_state(param.thd, param.table->pos_in_table_list,
INDEX_MERGE_HINT_ENUM, 0)) &&
interesting_order != ORDER_DESC &&
param.table->file->stats.records) {
/* Try creating index_merge/ROR-union scan. */
SEL_IMERGE *imerge;
TABLE_READ_PLAN *best_conj_trp = nullptr, *new_conj_trp = nullptr;
List_iterator_fast<SEL_IMERGE> it(tree->merges);
Opt_trace_array trace_idx_merge(trace, "analyzing_index_merge_union",
Opt_trace_context::RANGE_OPTIMIZER);
while ((imerge = it++)) {
new_conj_trp = get_best_disjunct_quick(¶m, imerge, &best_cost);
if (new_conj_trp)
param.table->quick_condition_rows =
min(param.table->quick_condition_rows, new_conj_trp->records);
if (!best_conj_trp ||
(new_conj_trp &&
new_conj_trp->cost_est < best_conj_trp->cost_est)) {
best_conj_trp = new_conj_trp;
}
}
if (best_conj_trp) best_trp = best_conj_trp;
}
}
}
(7) 根据找到的最佳扫描计划,构建quick_select
/*
If we got a read plan, create a quick select from it.
Only create a quick select if the storage engine supports using indexes for access.
*/
if (best_trp && (head->file->ha_table_flags() & HA_NO_INDEX_ACCESS) == 0) {
QUICK_SELECT_I *qck;
records = best_trp->records;
if (!(qck = best_trp->make_quick(¶m, true)) || qck->init())
qck = nullptr;
*quick = qck;
}
get_key_scans_params
//获取最佳的range_scan方法
static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree, //test_quick_select关于使用的索引的信息
bool index_read_must_be_used, //if true <=> index_only 无需回表查询
bool update_tbl_stats,
const Cost_estimate *cost_est);//当前最佳扫描方法的代价 table_scan or index_scan
相关变量
index_read_must_be_used //必须使用index_only方式
update_tbl_stats //是否需要更新表的统计信息
key_to_read //最终选择的索引节点 最佳索引
force_index_merge //强制使用索引合并
use_cheapest_index_merge //是否应该优先考虑代价更小的索引合并方案
函数流程
对所有可用索引进行遍历
//遍历当前所有可用索引的数量 并不是表中的所有索引
for (idx = 0; idx < param->keys; idx++) {
key = tree->keys[idx];
if (key) {}
}
对当前使用的索引构建查询计划
check_quick_select
中在已知扫描范围的情况下,计算该扫描方式下的cost
与rows
,通过函数multi_range_read_info_const
调用相关的代价函数与records_in_range
进行行数预估。
ha_rows found_records;
Cost_estimate cost;
uint mrr_flags = 0, buf_size = 0;
uint keynr = param->real_keynr[idx];//索引id
if (key->type == SEL_ROOT::Type::MAYBE_KEY || key->root->maybe_flag) //表示该索引依赖于其他表的数据(join),优化器需要考虑这种依赖关系
param->needed_reg->set_bit(keynr);
bool read_index_only = //是否是无需回表的index_only读取
index_read_must_be_used
? true
: (bool)param->table->covering_keys.is_set(keynr); //判断当前查询条件下 该索引是不是覆盖索引
//评估当前执行计划的可行性 需要扫描的记录数量 + cost 如果返回值found_records = HA_POS_ERROR 表示无法使用该索引
found_records =
check_quick_select(param, idx, read_index_only, key, update_tbl_stats, //关键点
&mrr_flags, &buf_size, &cost);
if (!compound_hint_key_enabled(param->table, keynr,
INDEX_MERGE_HINT_ENUM)) { //检查是否启用了复合提示USE INDEX 、IGNORE INDEX 并且影响到了当前的执行计划
continue;
}
对于check_quick_select
的结果,是否是更优的索引选择
if (found_records != HA_POS_ERROR && //找到了有效的记录数
(read_cost > cost || //并且当前索引的成本比已知的最佳成本更低
(force_index_merge && (!use_cheapest_index_merge || !key_to_read)))) { //强制使用索引合并 且 没有指定代价最小的索引或尚未选择任何索引
read_cost = cost; //更新相关的索引信息
best_records = found_records;
key_to_read = key;
best_idx = idx;
best_mrr_flags = mrr_flags;
best_buf_size = buf_size;
is_best_idx_imerge_scan = param->is_imerge_scan;
} else {}
//根据已知信息 返回最佳的执行计划
if (key_to_read) {
if ((read_plan = new (param->mem_root)
TRP_RANGE(key_to_read, best_idx, best_mrr_flags))) {
read_plan->records = best_records;
read_plan->is_ror = tree->ror_scans_map.is_set(best_idx);
read_plan->is_imerge = is_best_idx_imerge_scan;
read_plan->cost_est = read_cost;
read_plan->mrr_buf_size = best_buf_size;
}
}
return read_plan;
}
check_quick_select
//评估使用给定索引进行范围扫描时预期检索的记录数,并计算相关成本
static ha_rows check_quick_select(PARAM *param,
uint idx, // 索引id
bool index_only, // 是否index_only读取
SEL_ROOT *tree,
bool update_tbl_stats, // 是否更新统计信息
uint *mrr_flags, uint *bufsize,
Cost_estimate *cost); // 当前要对比的执行计划的代价
//在已知扫描范围的情况下,计算该扫描方式下的cost与rows 调用代价函数与records_in_range
rows = file->multi_range_read_info_const(keynr, &seq_if, (void *)&seq, 0,
bufsize, mrr_flags, cost);
if (rows != HA_POS_ERROR) {//有合理的行数预估结果
param->table->quick_rows[keynr] = rows;
if (update_tbl_stats) { //根据当前所选择的索引 更新相关的信息
param->table->quick_keys.set_bit(keynr);
param->table->quick_key_parts[keynr] = param->max_key_part + 1;
param->table->quick_n_ranges[keynr] = param->range_count;
param->table->quick_condition_rows =
min(param->table->quick_condition_rows, rows);
}
param->table->possible_quick_keys.set_bit(keynr);
}
handler::multi_range_read_info_const
ha_rows handler::multi_range_read_info_const(
uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param,
uint n_ranges_arg MY_ATTRIBUTE((unused)), uint *bufsz, uint *flags,
Cost_estimate *cost) {
KEY_MULTI_RANGE range;
range_seq_t seq_it;
ha_rows rows, total_rows = 0;
uint n_ranges = 0;
THD *thd = current_thd;
/* Default MRR implementation doesn't need buffer */
*bufsz = 0;
DBUG_EXECUTE_IF("bug13822652_2", thd->killed = THD::KILL_QUERY;);
seq_it = seq->init(seq_init_param, n_ranges, *flags);
while (!seq->next(seq_it, &range)) {
if (unlikely(thd->killed != 0)) return HA_POS_ERROR;
n_ranges++;
key_range *min_endp, *max_endp;
if (range.range_flag & GEOM_FLAG) {
min_endp = &range.start_key;
max_endp = nullptr;
} else {
min_endp = range.start_key.length ? &range.start_key : nullptr;
max_endp = range.end_key.length ? &range.end_key : nullptr;
}
int keyparts_used = 0;
if ((range.range_flag & UNIQUE_RANGE) && // 1) 该索引是unique index
!(range.range_flag & NULL_RANGE)) //且 该索引不是is null查询 因为即使是在unique index上 也是可以存在多个null值的
rows = 1; /* there can be at most one row */
else if (range.range_flag & SKIP_RECORDS_IN_RANGE && // 2)
!(range.range_flag & NULL_RANGE)) { //注意 对于is null查询 需要调用records_in_range() 去进行行数预估
if ((range.range_flag & EQ_RANGE) && //当前不是唯一索引列查询 并且是等值查询 意味着可能匹配到多行数据
(keyparts_used = my_count_bits(range.start_key.keypart_map)) &&
table->key_info[keyno].has_records_per_key(keyparts_used - 1)) {
//该种情况下 根据你使用的索引的实际part数 估计该等值查询下的records_per_key作为行数预估
rows = static_cast<ha_rows>(
table->key_info[keyno].records_per_key(keyparts_used - 1));
} else {
/*
Return HA_POS_ERROR if the range does not use all key parts and
the key cannot use partial key searches.
*/
if ((index_flags(keyno, 0, false) & HA_ONLY_WHOLE_INDEX)) {
assert(
(range.range_flag & EQ_RANGE) &&
!table->key_info[keyno].has_records_per_key(keyparts_used - 1));
total_rows = HA_POS_ERROR;
break;
}
/*
Since records_in_range has not been called, set the rows to 1.
FORCE INDEX has been used, cost model values will be ignored anyway.
*/
rows = 1;
}
} else { // 使用records_in_range()对索引的上下界进行判断 估计扫描行数
DBUG_EXECUTE_IF("crash_records_in_range", DBUG_SUICIDE(););
assert(min_endp || max_endp);
if (HA_POS_ERROR ==
(rows = this->records_in_range(keyno, min_endp, max_endp))) {
/* Can't scan one range => can't do MRR scan at all */
total_rows = HA_POS_ERROR;
break;
}
}
total_rows += rows;
}
//估计该索引扫描下的代价
if (total_rows != HA_POS_ERROR) {
const Cost_model_table *const cost_model = table->cost_model();
/* The following calculation is the same as in multi_range_read_info(): */
*flags |= (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SUPPORT_SORTED);
assert(cost->is_zero());
if (*flags & HA_MRR_INDEX_ONLY)//是否需要回表
*cost = index_scan_cost(keyno, static_cast<double>(n_ranges),//无需回表
static_cast<double>(total_rows));
else
*cost = read_cost(keyno, static_cast<double>(n_ranges),//需要回表
static_cast<double>(total_rows));
cost->add_cpu(
cost_model->row_evaluate_cost(static_cast<double>(total_rows)) + 0.01);
}
return total_rows;
}