崩溃恢复之日志解析
MySQL redo日志崩溃恢复整体流程如图所示,本文将针对日志解析过程中涉及到的几个函数源码进行详细解析。
1. recv_group_scan_log_recs()
函数流程
static
bool
recv_group_scan_log_recs(
log_group_t* group,
lsn_t* contiguous_lsn,
bool last_phase) // 接收并扫描日志组中的日志记录
{
DBUG_ENTER("recv_group_scan_log_recs");
assert(!last_phase || recv_sys->mlog_checkpoint_lsn > 0);
mutex_enter(&recv_sys->mutex);
recv_sys->len = 0;
recv_sys->recovered_offset = 0;
recv_sys->n_addrs = 0;
recv_sys_empty_hash();
srv_start_lsn = *contiguous_lsn;
recv_sys->parse_start_lsn = *contiguous_lsn;
recv_sys->scanned_lsn = *contiguous_lsn;
recv_sys->recovered_lsn = *contiguous_lsn;
recv_sys->scanned_checkpoint_no = 0;
recv_previous_parsed_rec_type = MLOG_SINGLE_REC_FLAG;
recv_previous_parsed_rec_offset = 0;
recv_previous_parsed_rec_is_multi = 0;
ut_ad(recv_max_page_lsn == 0);
ut_ad(last_phase || !recv_writer_thread_active);
mutex_exit(&recv_sys->mutex);
lsn_t checkpoint_lsn = *contiguous_lsn;
lsn_t start_lsn;
lsn_t end_lsn;
store_t store_to_hash = last_phase ? STORE_IF_EXISTS : STORE_YES;
ulint available_mem = UNIV_PAGE_SIZE
* (buf_pool_get_n_pages()
- (recv_n_pool_free_frames * srv_buf_pool_instances));
end_lsn = *contiguous_lsn = ut_uint64_align_down(
*contiguous_lsn, OS_FILE_LOG_BLOCK_SIZE);
do {
if (last_phase && store_to_hash == STORE_NO) {
store_to_hash = STORE_IF_EXISTS;
/* We must not allow change buffer
merge here, because it would generate
redo log records before we have
finished the redo log scan. */
recv_apply_hashed_log_recs(FALSE);
}
start_lsn = end_lsn;
end_lsn += RECV_SCAN_SIZE;
log_group_read_log_seg(
log_sys->buf, group, start_lsn, end_lsn);
} while (!recv_scan_log_recs(
available_mem, &store_to_hash, log_sys->buf,
RECV_SCAN_SIZE,
checkpoint_lsn,
start_lsn, contiguous_lsn, &group->scanned_lsn));
if (recv_sys->found_corrupt_log || recv_sys->found_corrupt_fs) {
DBUG_RETURN(false);
}
DBUG_PRINT("ib_log", ("%s " LSN_PF
" completed for log group " ULINTPF,
last_phase ? "rescan" : "scan",
group->scanned_lsn, group->id));
DBUG_RETURN(store_to_hash == STORE_NO);
}
1、参数验证和初始化:
- 使用
assert
断言来确保如果last_phase
为真,则必须有有效的检查点LSNrecv_sys->mlog_checkpoint_lsn
。 - 初始化接收系统
recv_sys
的一些关键成员变量,比如长度、恢复偏移量、地址数量等,并清空哈希表。设置一些起始LSN比如srv_start_lsn
、recv_sys->parse_start_lsn
等为传入的contiguous_lsn
值。
2、内存和资源准备:
- 计算可用的内存量,这是基于缓冲池的总页数减去为接收保留的空闲帧数。
3、日志扫描循环:
- 调整
end_lsn
为contiguous_lsn
向下对齐到日志块大小OS_FILE_LOG_BLOCK_SIZE
。
end_lsn = *contiguous_lsn = ut_uint64_align_down(
*contiguous_lsn, OS_FILE_LOG_BLOCK_SIZE);
- 在一个循环中,不断读取日志段
log_group_read_log_seg
,并处理日志记录recv_scan_log_recs
。如果是在最后阶段且之前设置为不存储到哈希表STORE_NO
,则更改为STORE_IF_EXISTS
,并应用已哈希的日志记录。
do {
if (last_phase && store_to_hash == STORE_NO) {
/* 如果满足条件,在日志解析中间,先进行日志应用操作
如果日志缓冲区满,将更改store_to_hash策略为STORE_NO,且满足last_parse条件
调用日志应用函数recv_apply_hashed_log_recs()*/
store_to_hash = STORE_IF_EXISTS;
recv_apply_hashed_log_recs(FALSE);
}
start_lsn = end_lsn;
end_lsn += RECV_SCAN_SIZE;
log_group_read_log_seg(
log_sys->buf, group, start_lsn, end_lsn); // 读取日志文件将start_lsn到end_lsn的日志读取到log_sys->buf
} while (!recv_scan_log_recs(
available_mem, &store_to_hash, log_sys->buf,
RECV_SCAN_SIZE,
checkpoint_lsn,
start_lsn, contiguous_lsn, &group->scanned_lsn)); // while条件为日志解析函数入口
4、错误处理和返回:
- 如果在扫描过程中发现损坏的日志或文件系统,则返回
false
。 - 打印日志信息,表示扫描或重新扫描完成。
- 返回
store_to_hash == STORE_NO
的值,这可能表示是否还有更多日志需要处理。
2. recv_scan_log_recs()
函数流程
static
bool
recv_scan_log_recs( // 接收并扫描日志记录的函数
/*===============*/
ulint available_memory,/*!< in: we let the hash table of recs
to grow to this size, at the maximum */
store_t* store_to_hash, /*!< in,out: whether the records should be
stored to the hash table; this is reset
if just debug checking is needed, or
when the available_memory runs out */
const byte* buf, /*!< in: buffer containing a log
segment or garbage */
ulint len, /*!< in: buffer length */
lsn_t checkpoint_lsn, /*!< in: latest checkpoint LSN */
lsn_t start_lsn, /*!< in: buffer start lsn */
lsn_t* contiguous_lsn, /*!< in/out: it is known that all log
groups contain contiguous log data up
to this lsn */
lsn_t* group_scanned_lsn)/*!< out: scanning succeeded up to
this lsn */
{
const byte* log_block = buf;
ulint no;
lsn_t scanned_lsn = start_lsn;
bool finished = false;
ulint data_len;
bool more_data = false;
ulint recv_parsing_buf_size = RECV_PARSING_BUF_SIZE;
ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_ad(len >= OS_FILE_LOG_BLOCK_SIZE);
do {
ut_ad(!finished);
no = log_block_get_hdr_no(log_block);
ulint expected_no = log_block_convert_lsn_to_no(scanned_lsn);
if (no != expected_no) {
finished = true;
break;
}
if (!log_block_checksum_is_ok(log_block)) {
ib::error() << "Log block " << no <<
" at lsn " << scanned_lsn << " has valid"
" header, but checksum field contains "
<< log_block_get_checksum(log_block)
<< ", should be "
<< log_block_calc_checksum(log_block);
finished = true;
break;
}
if (log_block_get_flush_bit(log_block)) {
if (scanned_lsn > *contiguous_lsn) {
*contiguous_lsn = scanned_lsn;
}
}
data_len = log_block_get_data_len(log_block);
if (scanned_lsn + data_len > recv_sys->scanned_lsn
&& log_block_get_checkpoint_no(log_block)
< recv_sys->scanned_checkpoint_no