MySQL 千万级数据迁移

该博客详细介绍了如何通过存储过程进行大规模MySQL表的数据迁移,以优化查询性能。首先计算有效数据量,创建备份表并删除索引,然后分页导入数据,最后重建索引并重命名表,确保新表与原表结构一致但数据更精简,提升查询效率。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

目录

背景介绍

大体思路

步骤详情


背景介绍

        项目中表数据存在超过50%的逻辑删除数据,影响查询性能,本次计划将逻辑有效的数据迁移到新表,使得表数据量减少,索引文件减少,提高查询效率。

大体思路

        通过存储过程实现表数据迁移,迁移过程中涉及动态删除索引、动态创建索引等操作,导入时通过分页的方式来导入逻辑有效数据。

  1. 计算表数据量大小
  2. 创建备份表(只保留表结构)
  3. 删除备份表索引(保留主键索引)
  4. 导入有效数据到备份表
  5. 给备份表创建索引(索引与原表一致)
    1. 这里使用视图先准备好需要创建的索引数据
    2. 再通过存储过程从视图中读取出数据继续创建
  6. 重命名备份表,重命名新表

步骤详情

  • Step-1:计算表数据量大小
CREATE DEFINER=`root`@`localhost` PROCEDURE `Step_01_CountUsefulData`(
	in schemaName varchar(50), -- 输入参数,schema名称
	in tableName varchar(50),  -- 输入参数,表名称
	in pageSize INT,  -- 输入参数,每页大小
	OUT totalPages INT, -- 输出参数,总页数
	OUT minId INT, -- 输出参数,最新id
	OUT maxId INT -- 输出参数,最大id
	)
BEGIN
	SET @begin_date = now();
	SET @pageSize = pageSize;
	
	SET @stmt = CONCAT('SELECT count(1), min(id), max(id) FROM ', tableName, ' where is_deleted = false into @totalRecords, @minId, @maxId');
	prepare stmt from @stmt;
	execute stmt;
	deallocate prepare stmt;
	
	SET totalPages = CEILING(@totalRecords/pageSize);
	SET minId = @minId;
	SET maxId = @maxId;
	SET @ct = TIMESTAMPDIFF(SECOND, @begin_date, now());
	select '计算表数据量' as '任务名称', tableName as '表名', @totalRecords as '总条数', @pageSize as '每页大小', @ct as '用时/秒';
END
  • Step-2:创建备份表(只保留表结构)
CREATE DEFINER=`root`@`localhost` PROCEDURE `Step_02_CreateBakTable`(
	in schemaName varchar(50), -- 输入参数,schema名称
	in originalTableName varchar(50), -- 输入参数,原表名称
	in newTableName varchar(50) -- 输入参数,新表名称
)
BEGIN
    set @stmt = CONCAT('create table ', newTableName, ' like ', originalTableName);	
	select '创建备份表' as '任务名称', newTableName as '备份表', originalTableName as '原表', @stmt as '备份SQL';
	
	prepare stmt from @stmt;
	execute stmt;
	deallocate prepare stmt;
END
  • Step-3:删除备份表索引(保留主键索引)

根据传入的schema名称和表名称动态删除索引(保留主键索引不删)

CREATE DEFINER=`root`@`localhost` PROCEDURE `Step_03_RemoveTableIndex`(
	in tableSchema varchar(50),  -- 输入参数
	in newTableName varchar(50)  -- 输入参数
)
BEGIN
	select '删除表索引开始' as '任务名称', tableSchema as 'TableSchema', newTableName as '表名';
	SET @begin_time = now();
	
	-- 查询表索引,并拼接成drop语句
	set @stmt_idx = CONCAT("SELECT GROUP_CONCAT(concat('drop index ',INDEX_NAME)) as idx FROM ",
		"(select DISTINCT(INDEX_NAME) FROM INFORMATION_SCHEMA.STATISTICS T WHERE T.TABLE_SCHEMA = '", tableSchema, "'"
		" AND T.TABLE_NAME = '", newTableName, "' AND T.INDEX_NAME <> 'PRIMARY') t1 into @dropsql");
		
	prepare stmt from @stmt_idx;
	execute stmt;
	deallocate prepare stmt;
	
	IF @dropsql is not NULL THEN
		-- 拼接删除语句
		set @stmt = CONCAT('ALTER TABLE ', newTableName, ' ', @dropsql, ';');
		select '删除表索引' as '任务名称', @stmt as '删除SQL';
		
		prepare stmt from @stmt;
		execute stmt;
		deallocate prepare stmt;
	ELSE
		select '删除表索引失败' as '任务名称', newTableName as '该表没有索引,无需删除';
	END IF;
	
	SET @cost = TIMESTAMPDIFF(SECOND, @begin_time, now());
	select '删除表索引结束' as '任务名称', newTableName as '表名', @cost as '耗时(秒)';
END
  • Step-4:导入逻辑有效数据到备份表
CREATE DEFINER=`root`@`localhost` PROCEDURE `Step_04_ImportTableData`(
	in tableSchema VARCHAR(50), 
	in originalTableName VARCHAR(50), 
	in newTableName VARCHAR(50), 
	in pageSize int, 
	in minId int, 
	in maxId int
)
BEGIN
	DECLARE fromId int;
	SELECT '导入数据开始' as '任务名称', tableSchema 'Schema Name', originalTableName '原表名', newTableName as '新表名', pageSize as '每页数据大小', minId as '最小id', maxId as '最大Id';
	SET @pageSize = pageSize;
	SET @newTableName = newTableName;
	SET @originalTableName = originalTableName;
	SET @minId = minId;
	SET @maxId = maxId;
	set @fromId = @minId;
	
	SET @query_begin_date = now();
	WHILE @minId <= @maxId DO
	
		SET @stmt = CONCAT('INSERT INTO ', @newTableName, ' SELECT * FROM ', @originalTableName, ' where is_deleted = false and id BETWEEN ? and ?;');
		
		SET @minId = @minId;
		SET @nextId = @minId + @pageSize;
		
		-- select @stmt as '备份SQL', @minId as 'MinId', @maxId as 'MaxId';
		-- 开启事务,执行一批数据后就提交事务
		BEGIN
		START TRANSACTION;
			prepare stmt from @stmt;
			execute stmt USING @minId, @nextId;
			deallocate prepare stmt;
		COMMIT;
		END;
		
		-- select @stmt as '备份SQL', @minId as 'MinId', @nextId as 'NextId', @ct as '导入数据用时/秒';
		SET @minId = @nextId + 1;
		
	END WHILE;
	SET @ct = TIMESTAMPDIFF(SECOND, @query_begin_date, now());
	select '导入数据结束' as '任务名称', @ct as '耗时(秒)';

END
  • Step-5:给备份表创建索引(索引与原表一致)

包括唯一索引、联合索引的创建

CREATE DEFINER=`root`@`localhost` PROCEDURE `Step_05_CreateTableIndex`(
	in schemaName VARCHAR(50), 
	in originalTableName varchar(50), 
	in newTableName varchar(50)
)
BEGIN

	select '创建索引开始' as '任务名称', newTableName as '表名';
	SET @begin_time = now();

	-- 根据schemaName, tableName 查询表索引信息,并创建视图 (动态游标的处理方案)
	call Step_05_01_CreateTableIndex(schemaName, originalTableName);

	-- 从上一步的视图中读取数据
	call Step_05_02_CreateTableIndex(schemaName, newTableName);

	SET @cost = TIMESTAMPDIFF(SECOND, @begin_time, now());
	select '创建索引结束' as '任务名称', newTableName as '表名', @cost as '耗时(秒)';

END
CREATE DEFINER=`root`@`localhost` PROCEDURE `Step_05_01_CreateTableIndex`(
	in tableSchema varchar(50), 
	in newTableName varchar(50)
)
BEGIN
  DROP VIEW IF EXISTS tmp_table_idx_view;  
	SET @sqlstr = "CREATE VIEW tmp_table_idx_view as ";  
	SET @sqlstr = CONCAT(@sqlstr , "SELECT non_unique, index_name, seq_in_index, column_name, index_type, index_comment FROM INFORMATION_SCHEMA.STATISTICS t WHERE t.TABLE_SCHEMA = '", 
								tableSchema, "' AND t.TABLE_NAME ='", newTableName, "' AND t.INDEX_NAME <> 'PRIMARY'");  
  
	PREPARE stmt FROM @sqlstr;
	EXECUTE stmt; 
	DEALLOCATE PREPARE stmt; 
END

重点:索引结构动态SQL的拼接如下:

CREATE DEFINER=`root`@`localhost` PROCEDURE `Step_05_02_CreateTableIndex`(
	in tableSchema varchar(50), 
	in tableName varchar(50)
)
BEGIN
	DECLARE is_finished int;
	DECLARE non_unique int; 		-- 非主键索引 0:是主键索引 1:非主键索引
	DECLARE idxname VARCHAR(50);	-- 索引名称
	DECLARE idxseq int;				-- 索引序号
	DECLARE clmname VARCHAR(50);	-- 索引列名称
	DECLARE idxtype VARCHAR(50);	-- 索引类型
	DECLARE idxcmt VARCHAR(50);		-- 索引备注
	DECLARE pre_sql VARCHAR(500) DEFAULT 'ADD $unique INDEX `$idxname`($idxcln) USING $idxtype $cmt';
	DECLARE result_sql VARCHAR(3500) DEFAULT '';
	
	DECLARE cursor_query CURSOR FOR SELECT * FROM tmp_table_idx_view; -- 从视图中读取数据
	DECLARE CONTINUE HANDLER FOR NOT FOUND SET is_finished = 1;
	
	SET @begin_time = now();
	select '创建表索引step2开始' as '任务名称', tableSchema as 'TableSchema', tableName as '表名';
	
	OPEN cursor_query;
	get_idx: LOOP
		
		FETCH cursor_query into non_unique, idxname, idxseq, clmname, idxtype, idxcmt;
		IF is_finished=1 THEN
			LEAVE get_idx; 
		END IF; 
		
		if idxseq = 1 then
			-- 新创建
			set result_sql = CONCAT(result_sql, ',', pre_sql);
			set result_sql = replace(result_sql,'$clmns', '');
			set result_sql = replace(result_sql, '$idxname', idxname);
			set result_sql = replace(result_sql, '$idxcln', CONCAT('`', clmname, '`', '$clmns'));
			set result_sql = replace(result_sql, '$idxtype', idxtype);
			
			if non_unique = 0 THEN
				set result_sql = replace(result_sql, '$unique', 'UNIQUE');
			else
				set result_sql = replace(result_sql, '$unique', '');
			end if;
			
			if idxcmt is NULL THEN
				set result_sql = replace(result_sql, '$cmt', '');
			else
				set result_sql = replace(result_sql, '$cmt', CONCAT(' COMMENT \'', idxcmt, '\''));
			end if;
			
		elseif idxseq > 1 then
			-- 替换
			set result_sql = replace(result_sql, '$clmns', CONCAT(',`', clmname, '`', '$clmns'));
		end if;

	END LOOP get_idx;
	CLOSE cursor_query;
	
	set result_sql = replace(result_sql , '$clmns', '');
	set result_sql = replace(result_sql, '$idxcln', '');
	set result_sql = CONCAT(replace(result_sql ,'$clmns',''), ';');
	
	set result_sql = right(result_sql, length(result_sql) - 1);
	set result_sql = CONCAT('ALTER TABLE `', tableName,'` ', result_sql);
	
	select '创建表索引step2中...' as '任务名称', result_sql as 'SQL';
	
	set @stmt = result_sql;
	PREPARE stmt FROM @stmt;  
	EXECUTE stmt;  
	DEALLOCATE PREPARE stmt; 
	SET @cost = TIMESTAMPDIFF(SECOND, @begin_time, now());
	select '创建表索引step2结束' as '任务名称', result_sql as 'SQL', @cost as '耗时(秒)';
	
	set @stmt = NULL;
	set result_sql = NULL;
	DROP VIEW IF EXISTS tmp_table_idx_view;

END

  • Step-6:重命名备份表,重命名新表
CREATE DEFINER=`root`@`localhost` PROCEDURE `Step_06_RenameTableName`(
  in schemaName VARCHAR(50), 
  in originalTableName VARCHAR(50), 
  in newTableName VARCHAR(50)
)
BEGIN
  /*
  原表名: t_user
  备份表: bak_t_user_年月日时分秒

  重命名后: 
    旧表: t_user --> bak_bak_t_user_年月日时分秒
    新表: bak_t_user_年月日时分秒 --> t_user
  */
	
  SET @begin_time = now();
  select '重命名表名开始' as '任务名称', schemaName as 'SchemaName', newTableName as '表名称', originalTableName as '原表名称';
	
  set @stmt = CONCAT('RENAME TABLE ', originalTableName, ' to bak_', newTableName, ', ', newTableName, ' to ', originalTableName);

  prepare stmt from @stmt;
  execute stmt;
  deallocate prepare stmt;
	
  SET @cost = TIMESTAMPDIFF(SECOND, @begin_time, now());
  select '重命名表名结束' as '任务名称', @stmt as '重命名表名SQL语句', @cost as '耗时(秒)';
END

其他:

删除表索引的方式有:

  • 方式一:
ALTER TABLE t_user 
drop index idx_name,
drop index idx_age;
  • 方式二
drop index idx_name on t_user;
drop index idx_age on t_user;

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值