1.如何在20s内完成两个表各100w条数据的快速比对。
CREATE DATABASE test;
use test;
CREATE TABLE student (
id int,
name varchar(255),
address varchar(255),
city varchar(255)
);
CREATE TABLE teacher (
id int,
name varchar(255),
address varchar(255),
city varchar(255)
);
INSERT INTO student VALUES (1, '刘一', '郑十家隔壁', '河南');
INSERT INTO student VALUES (2, '陈二', '李四家隔壁', '安徽');
INSERT INTO student VALUES (3, '张三', '白娘子家隔壁', '杭州');
INSERT INTO student VALUES (4, '李四', '许仙家隔壁', '杭州');
INSERT INTO student VALUES (5, '王五', '李四家隔壁', '杭州');
INSERT INTO student VALUES (6, '赵六', '赵六家隔壁', '杭州');
INSERT INTO student VALUES (7, '孙七', '张三家隔壁', '杭州');
INSERT INTO student VALUES (8, '周八', '雷峰塔附近', '杭州');
INSERT INTO student VALUES (9, '吴九', '孙七家隔壁', '杭州');
INSERT INTO student VALUES (10, '郑十', '周八家隔壁', '杭州');
INSERT INTO teacher VALUES (1, '刘一', '郑十家隔壁', '河南');
INSERT INTO teacher VALUES (2, '陈二', '李四家隔壁', '安徽');
INSERT INTO teacher VALUES (3, '张三', '白娘子家隔壁', '杭州');
INSERT INTO teacher VALUES (4, '李四', '许仙家隔壁', '杭州');
INSERT INTO teacher VALUES (5, '王五', '李四家隔壁', '杭州');
INSERT INTO teacher VALUES (6, '赵六', '赵六家隔壁', '杭州');
INSERT INTO teacher VALUES (7, '孙七', '张三家隔壁', '杭州');
INSERT INTO teacher VALUES (8, '周八', '雷峰塔附近', '杭州');
INSERT INTO teacher VALUES (9, '吴九1', '孙七家隔壁', '杭州');
-- select md5(concat_ws('|',name,address)) as record_key from teacher;
-- SELECT * FROM student;
-- select * from teacher;
-- select l.record_key,r.record_key
select count(distinct l.record_key) as left_table_num,
count(r.record_key) as right_table_num,
count(distinct case when l.record_key= r.record_key then l.record_key END) as left_right_equal_num
from
(select md5(concat_ws('|',name,address)) as record_key from student) l
left join
(select md5(concat_ws('|',name,address)) as record_key from teacher) r
on l.record_key = r.record_key
union all
select count(distinct l.record_key) ,
count(r.record_key) ,
count(distinct case when l.record_key= r.record_key then l.record_key END)
from
(select md5(concat_ws('|',name,address)) as record_key from teacher) r
left join
(select md5(concat_ws('|',name,address)) as record_key from student) l
on r.record_key = l.record_key
where l.record_key is NULL
left join + right join +union all 模拟 full outer join
左表数量, 右表中匹配的数量, 左右表相等的数量。
未匹配的记录, right 中独有的记录, 未匹配的记录

被折叠的 条评论
为什么被折叠?



