有需求如下:
现在要补齐tb1中演唱歌曲字段。条件是去tb2中查找相同艺人演唱过的歌曲,随机填充到tb1中的歌曲名字段
一个歌手不止演唱一首歌,所以tb2中是艺人演唱所有歌曲的集合。tb1中同一个歌手可能出现好几次
补齐时候需根据tb1中艺人名称去tb2也就是艺人歌曲汇总表中查找相同艺人演唱的歌曲名称。
需要在艺人名相同情况下随机取tb2中演唱歌曲名去一一补齐tb1中的字段 tb1
tb1
艺人 演唱歌曲名
a null
b null
c null
a null
s null
d null
e null
tb2
艺人 演唱歌曲名
a aa
a ab
b bb
b ba
b bbb
d dd
d d2
f ddd
c cc
艺人 演唱歌曲名称
a aa (tb1中的艺人名会出现好几次每次在tb2中,只要随机的一条来填充)
a ab
b bb
d dd
c cc
=========================================================
一、最终SQL结果
1、sqlserver的实现:
create table tb1(
id varchar(60),--需要表的主键
yr varchar(20),
ycgqm varchar(50)
)
create table tb2(
id varchar(60),--表的主键(可以没有)
yr varchar(20),
ycgqm varchar(50)
)
insert into tb1(id,yr,ycgqm) values(newid(),'a',null);
insert into tb1(id,yr,ycgqm) values(newid(),'b',null);
insert into tb1(id,yr,ycgqm) values(newid(),'e',null);
insert into tb1(id,yr,ycgqm) values(newid(),'a',null);
insert into tb1(id,yr,ycgqm) values(newid(),'s',null);
insert into tb1(id,yr,ycgqm) values(newid(),'d',null);
insert into tb1(id,yr,ycgqm) values(newid(),'e',null);
insert into tb1(id,yr,ycgqm) values(newid(),'a',null);
insert into tb2(id,yr,ycgqm) values(newid(),'a','aa');
insert into tb2(id,yr,ycgqm) values(newid(),'a','ab');
insert into tb2(id,yr,ycgqm) values(newid(),'b','bb');
insert into tb2(id,yr,ycgqm) values(newid(),'b','ba');
insert into tb2(id,yr,ycgqm) values(newid(),'b','bbb');
insert into tb2(id,yr,ycgqm) values(newid(),'d','dd');
insert into tb2(id,yr,ycgqm) values(newid(),'d','d2');
insert into tb2(id,yr,ycgqm) values(newid(),'f','ddd');
insert into tb2(id,yr,ycgqm) values(newid(),'c','cc');
insert into tb2(id,yr,ycgqm) values(newid(),'a','ac');
update tb1
set ycgqm=
(
select bycgqm from
(
select * from
(
select t.*
, ROW_NUMBER() OVER(PARTITION BY anumyr ORDER BY bycgqm) AS tnum from (
select b.*,a.*,cast(anum as varchar(20))+ ayr as anumyr from (
select id as arid,a.yr as ayr,a.ycgqm as aycgqm
,ROW_NUMBER() OVER(PARTITION BY yr ORDER BY yr) AS anum from tb1 a
) a,(
select id as brid, b.yr as byr,b.ycgqm as bycgqm from tb2 b
) b where ayr = byr
) t
) t where anum=tnum
) t
WHERE arid=tb1.id
)
2、oracle的实现:
create table tb1(
yr varchar(20),
ycgqm varchar(50)
)
create table tb2(
yr varchar(20),
ycgqm varchar(50)
)
select * from tb1
insert into tb1(yr,ycgqm) values('a',null);
insert into tb1(yr,ycgqm) values('b',null);
insert into tb1(yr,ycgqm) values('e',null);
insert into tb1(yr,ycgqm) values('a',null);
insert into tb1(yr,ycgqm) values('s',null);
insert into tb1(yr,ycgqm) values('d',null);
insert into tb1(yr,ycgqm) values('e',null);
insert into tb1(yr,ycgqm) values('a',null);
insert into tb2(yr,ycgqm) values('a','aa');
insert into tb2(yr,ycgqm) values('a','ab');
insert into tb2(yr,ycgqm) values('b','bb');
insert into tb2(yr,ycgqm) values('b','ba');
insert into tb2(yr,ycgqm) values('b','bbb');
insert into tb2(yr,ycgqm) values('d','dd');
insert into tb2(yr,ycgqm) values('d','d2');
insert into tb2(yr,ycgqm) values('f','ddd');
insert into tb2(yr,ycgqm) values('c','cc');
insert into tb2(yr,ycgqm) values('a','ac');
update tb1
set ycgqm=
(
select bycgqm from
(
select * from
(
select rownum r,t.*,(select count(*) from tb1 where tb1.yr=t.ayr) as cnt
, ROW_NUMBER() OVER(PARTITION BY anumyr ORDER BY bycgqm) AS tnum from (
select b.*,a.*,anum || ayr as anumyr from (
select rowid as arid,rownum || 'a' as ra,a.yr as ayr,a.ycgqm as aycgqm
,ROW_NUMBER() OVER(PARTITION BY yr ORDER BY yr) AS anum from tb1 a
) a,(
select rowid as brid, rownum || 'b' as rb ,b.yr as byr,b.ycgqm as bycgqm from tb2 b
) b where ayr = byr order by ayr
) t order by byr,anum,tnum
) where anum=tnum
)
WHERE arid=tb1.rowid
)
二、实现思路
整个思路关键在于tb1中的多个歌手需要随机填写tb2中的歌手对应的歌曲,而且不重复。对于这点,第一想到随机,rand,但这没法保证不重复。于是想到方法
1、对tb1中根据歌手分组,每个歌手有多条记录,则按歌手内记录顺序编号,也就是第一个歌手如果有2条记录,则为1,2,第二个歌手有3条记录,则为1,2,3,这也就是对应第一层SQL
2、对tb1和tb2做笛卡尔积,形成矩阵表(效率是值得斟酌的,如果数据量大,那必须抛弃了),根据结果,按歌手分组,按歌曲在歌手内顺序编号。
3、取歌手顺序和歌曲顺序相等的记录。这是因为如果有1个歌手,在tb1中有3条记录,那么编号是1,2,3,按歌曲编号后,每条记录对应3个歌曲,也就是笛卡尔积后生成3条记录,这记录编号也是1,2,3排序,而且每条记录的编号排序规则是一样的。所以第一条记录取第一首歌曲,第二条记录取第二首歌曲,依次类推,只要歌曲数多肯定不会重复
4、根据主键或者rowid最终定位没条tb1的记录位置,便于update
三、最后补充一种评论中的思路,因为SQL太长,评论中不让贴,这个是随机获取歌手下的歌曲
update tb1
set ycgqm=(
select bycgqm from (
select t3.id,
(--根据tb1中生成的随机数取歌曲
select ycgqm from (
select b.*,ROW_NUMBER() OVER(PARTITION BY yr ORDER BY ycgqm) AS tnum from tb2 b
) b2 where b2.yr=t3.yr and b2.tnum = t3.rndnum
) as bycgqm
from (--根据tb1中取歌手名称下哪首歌的行号
select t2.*,cast(ceiling(rand(checksum(newid()))*gqtotalnum) as int) as rndnum from
(
select t.*,(select count(*) from tb2 where yr=t.yr) as gqtotalnum
from tb1 t
) t2
) t3
) t4 where t4.id = tb1.id
)