这篇来说说TOP 和APPLY
1.TOP
一般来说 我们的TOP 和 ORDER BY 是一起连用的 这样可以返回确定的行 你可以看做是一个筛选器,可以再表表达式中使用。
SQL 2005 TOP 后面支持连接 变量 算术表达式 子查询结果.
在2000里 只支持后面具体数字 如果想在2000中使用类似TOP (@n)的功能 参考http://blog.youkuaiyun.com/feixianxxx/archive/2009/08/03/4405684.aspx
这里有个小知识点: WITH TIES 。
create table # (a int)
insert # select
1 union all select
2 union all select
2 union all select
3 union all select
3 union all select
5
select top (4) A from # order by a
/*
A
-----------
1
2
2
3
*/
select top (4) with ties A from # order by a
/*
A
-----------
1
2
2
3
3
*/
--使用了WITH TIES 后 将生成额外的行,多出来的行是与最后返回行相同的值 例子里是3
SQL2005支持TOP出现在INSERT UPDATE DELETE 后面 控制操作的行数,比如 insert top(10) into tb select * from .....
不过可惜ORDER BY不能用于这些结构中,这样你也许会觉得无法控制。倒是有方法可以控制:
插入:insert tb select top (n) * from .... order by ....
删除:with cte as
(
select top(10) * from ... order by ...
)
delete from cte
修改: with cte as
(
select top(10) * from ... order by ...
)
update cte set ......
实际小问题:
工作中有时候需要清理历史数据的时候,往往数据量达到了几百万.这个时候采用一次性删除的方法是很不明智的:
a.delete操作会被完整记录到日志里,它需要大量空间和时间;
b.如果删除中间发生中断,一切删除会回滚(在一个事务里);
c.同时删除多行,记录上的锁也许会被提升为排它表锁,从而阻碍操作完成之前有对这个表的操作(有时候会妨碍正常的业务)
所以一般采取分批删除的方法.
sql2000:通过set rowcount 来控制每次删除的记录数
SET ROWCOUNT 5000;
WHILE 1 = 1
BEGIN
DELETE FROM dbo.LargeOrders
WHERE OrderDate < '19970101';
IF @@rowcount < 5000 BREAK;
END
SET ROWCOUNT 0;
sql2005:通过TOP的方法
WHILE 1 = 1
BEGIN
DELETE TOP(5000) FROM dbo.LargeOrders
WHERE OrderDate < '19970101';
IF @@rowcount < 5000 BREAK;
END
-----------------
分批更新大量数据类似.
PS: 如果一个表有709行 你 select top (1) percent 709除以100等于7.09 返回的行数是8 说明它是向上舍入的
2.APPLY
它是一个表运算符--FROM A CROSS APPLY() B 它一定先是计算左边的表表达式,这个是必须了解的.
具体参考MSDN,写得很清楚了http://msdn.microsoft.com/zh-cn/library/ms175156.aspx
3.TOP 和 APPLY 结合解决问题
a.返回每组的前N行
create table #test (id int, val int)
create table #t (id int,yuangong varchar(10))
insert #test select
1,2 union all select
1,1 union all select
1,4 union all select
2,1 union all select
2,2 union all select
3,3 union all select
3,4 union all select
3,5 union all select
3,6 union all select
3,9 union all select
4,1
insert #t select
1,'a' union all select
2,'b' union all select
3,'c' union all select
4,'d'
--方法1
select a.yuangong,k.val
from #test k join #t a on a.id=k.id
where val in(select top (2) val from #test where k.id=id order by val)
order by a.yuangong,val
--方法2
select k.yuangong,l.val
from #t k cross apply( select top (2) val from #test where k.id=id order by val)l
order by k.yuangong,val
--方法3
select a.yuangong,k.val
from #t a join (select ROW_NUMBER() over(PARTITION by id order by val) as rn ,*from #test) k
on a.id=k.id
where rn=1 or rn=2
order by a.yuangong,val
/*
yuangong val
---------- -----------
a 1
a 2
b 1
b 2
c 3
c 4
d 1
*/
---效率越来越高依次...执行计划上看...
b.匹配当前值和前一个值
create table #p (id int ,eid int ,num1 int,num2 int )
insert #p select
1,1,2, 1 union all select
2,1,3 ,2union all select
3,2,2 ,2union all select
4,2,3 ,3union all select
5,3,2 ,4union all select
7,3,3 ,5union all select
9,3,4 ,5union all select
11,4,2 ,5union all select
13,4,2 ,6union all select
18,4,2 ,7
--方法1:
select a.eid as c_eid ,a.num1 as c_num1, a.num2 as c_num2,
b.eid as p_eid,b.num1 as p_num1, b.num2 as p_num2
from #p a left join #p b on
b.id=(select top 1 id from #p where eid=a.eid and (a.num1>num1 or(a.num1=num1 and a.num2>num2 )) ORDER BY num1,num2)
--方法2:
select a.eid as c_eid ,a.num1 as c_num1, a.num2 as c_num2,
b.eid as p_eid,b.num1 as p_num1, b.num2 as p_num2
from #p a OUTER APPLY --注意这里用OUTER
(SELECT TOP 1 eid,num1,num2 FROM #p WHERE eid=a.eid and (a.num1>num1 or(a.num1=num1 and a.num2>num2 )) ORDER BY num1,num2 )B
--方法3:
;with cte as
(
select ROW_NUMBER() over(PARTITION by eid order by num1,num2) as rn ,* from #p
)
select a.eid as c_eid ,a.num1 as c_num1, a.num2 as c_num2,
b.eid as p_eid,b.num1 as p_num1, b.num2 as p_num2
from cte a left join cte b on a.eid=b.eid and a.rn=b.rn+1
/*
c_eid c_num1 c_num2 p_eid p_num1 p_num2
----------- ----------- ----------- ----------- ----------- -----------
1 2 1 NULL NULL NULL
1 3 2 1 2 1
2 2 2 NULL NULL NULL
2 3 3 2 2 2
3 2 4 NULL NULL NULL
3 3 5 3 2 4
3 4 5 3 3 5
4 2 5 NULL NULL NULL
4 2 6 4 2 5
4 2 7 4 2 6
*/
---执行计划上看 最低的还是第一个 用ROW_NUMBER时最快的
c.随机行
说道随机 有人会想到 select top .....order by rand()
create table # (val int)
insert # select
1 union all select
7 union all select
14 union all select
13 union all select
12 union all select
11 union all select
4
select top 3 *
from #
order by RAND()
/*
val
-----------
1
7
14
*/
--你连续运行这个代码,你会发现一直是同一个结果,那是因为RAND()还有getdate()等大多数的不确定函数对同一个查询只调用一次.
关于RAND()函数我简单说个东西.
运行:select rand(7),rand()
/*
---------------------- ----------------------
0.713703791040473 0.945719735396816
*/
这里的结果无论你运行几次都一样;这是因为RAND()函数其实是基于你上个RAND()函数运行的调用而运行的.这里是RAND(7)..
RAND返回一定范围的数字:
方法1的数字范围:0至N-1之间,如CAST( FLOOR(RAND()*100) AS INT)就会生成0至99之间任一整数
方法2的数字范围:1至N之间,如CAST(CEILING(RAND() * 100) AS INT)就会生成1至100之间任一整数
那么返回指定行数的随机行:
SELECT TOP N * FROM TABLE_NAME ORDER BY NEWID() ----N是一个你指定的整数,表是取得记录的条数.
or: SELECT TOP N * FROM TABLE_NAME ORDER BY CHECKSUM(NEWID()) --CHECKSUM(NEWID())可以得到一个随机值
if object_id('tb') is not null
drop table tb
create table tb (s_id int,t_id int, fenshu int)
insert into tb
select 1,1,66 union all
select 1,2,67 union all
select 2,1,65 union all
select 2,2,78 union all
select 3,1,66 union all
select 3,2,55
--这个方法可以给不同数加上不同随机数(newid())
select fenshu,(fenshu+cast(ceiling(RAND(CHECKSUM(NEWID()))*10)as int))as fenshu2
from tb
/*
fenshu fenshu2
----------- -----------
66 68
67 72
65 67
78 80
66 76
55 62
fenshu fenshu2
----------- -----------
66 67
67 74
65 66
78 80
66 73
55 57
*/
--这个方法只能给不同数随机加上相同数(RAND())
select fenshu,(fenshu+cast(CEILING(RAND() * 10) AS INT))as fenshu2
from tb
或者
SELECT fenshu,fenshu+cast(ceiling(RAND(CHECKSUM(rand()))*10) as int) fenshu2
FROM tb
/*
fenshu fenshu2
----------- -----------
66 67
67 68
65 66
78 79
66 67
55 56
*/
d.中间值
我在前面那篇笔记中已经写了相关的问题,这里针对TOP和忽然想到的NTILE 再说2个方法
create table a(rq varchar(8), ddsj int)
insert into a
select
'200805',30 union all select
'200805',40 union all select
'200805',50 union all select
'200805',20 union all select
'200806',250 union all select
'200806',200 union all select
'200806',310 union all select
'200806',100 union all select
'200806',130;
方法1:top
select rq,
((select MAX(ddsj) from (select top 50 percent ddsj from a where rq=k.rq order by ddsj) as p)+
(select Min(ddsj) from (select top 50 percent ddsj from a where rq=k.rq order by ddsj desc ) as p))/2 as 中值
from a k
group by rq
方法2:NTILE
select distinct rq,
((select max(ddsj) from (select NTILE(2) over(partition by rq order by ddsj) as rn1,* from a ) z where k.rq=rq and rn1=1)+
(select min(ddsj) from (select NTILE(2) over(partition by rq order by ddsj desc ) as rn2 ,* from a ) z where k.rq=rq and rn2=1))/2 as 中值
from a k
/*
rq 中值
-------- -----------
200805 35
200806 200
*/