--
个人blog http://blog.youkuaiyun.com/wlzd3636/
--
-----------------------------------------------------------
--
关于分页,排序中 子查询和排序函数,中间表+identity列的性能比较.
--
2000中没有排序函数 只有子查询和 identity列
--
生成测试表
if
object_id
(N
'
test1
'
,
'
U
'
)
is
not
null
drop
table
test1
create
table
test1
(id
int
identity
(
1
,
1
),
part
varchar
(
20
),
value
smalldatetime
not
null
);
create
clustered
index
index_value
on
test1(value);
create
nonclustered
index
index_part
on
test1(part);
--
生成测试数据
declare
@row
int
,
@part
int
,
@date
datetime
select
@row
=
1
,
@part
=
1
while
@part
<
1000
begin
select
@date
=
'
2006-01-01
'
while
@row
<
200
begin
insert
into
test1(part,value)
select
'
a
'
+
cast
(
@part
as
varchar
(
3
)),
@date
select
@date
=
@date
+
1
,
@row
=
@row
+
1
end
select
@part
=
@part
+
1
,
@row
=
1
end
--
pm1.8 内存 768 生成脚本耗时 02:01 min
--
ps
select
*
from
test1
order
by
part
select
*
from
test1
你会发现
identity
列 优化器并不会一定按照顺序来分配的!
--
2000 子查询
select
part,value,px
=
(
select
count
(value)
+
1
from
test1
as
b
where
a.part
=
b.part
and
a.value
>
b.value)
from
test1 a
--
pm 1.8 内存 768 20w 数据在 part 和 value 有索引的前提下 耗时 01.17 min
--
2005 排名函数
select
part,value,px
=
row_number()
over
(partition
by
part
order
by
value)
from
test1
--
pm 1.8 内存 768 20w 数据在 part 和 value 有索引的前提下 耗时 00.19 min
先分析 2000子查询
在有索引的前提下一个分区排序扫描的行数为找个分区行数(假设为n行)
扫描的行数为 (n
+
n
*
n)
/
2
所以所有的分区扫描的行数为 (假设分区为n个 每个分区的行数为 partn)
(part1
+
part1
*
part1)
/
2
+
(part2
+
part2
*
part2)
/
2
+
...
+
(partn
+
partn
*
partn)
/
2
假设每个分区都为n行
则总扫描行数为 part数
*
(n
+
n
*
n)
/
2
因此
最优化子查询的每个分区平均行数数 最好在 10行左右
--
再谈 2000下 select id=identity(int,1,1), * into # from table
数据库假如不在
full
模式下
select
into
是不完全记录日志的
因此 2000模式下 用 自增列在排序也要优于子查询生成 排名函数
除非 每个分区的平均行数在10行左右.
--
在一般游戏里面可能都会统计到一段时间内玩家连续上线的次数并且计算积分
--
eg:
--
测试数据
create
table
t(name
varchar
(
10
),date
smalldatetime
)
insert
into
t
select
'
a
'
,
'
2008-01-01
'
union
all
select
'
a
'
,
'
2008-01-02
'
union
all
select
'
a
'
,
'
2008-01-03
'
union
all
select
'
a
'
,
'
2008-01-06
'
union
all
select
'
a
'
,
'
2008-01-07
'
union
all
select
'
a
'
,
'
2008-01-09
'
union
all
select
'
b
'
,
'
2008-01-01
'
union
all
select
'
b
'
,
'
2008-01-02
'
union
all
select
'
b
'
,
'
2008-01-03
'
union
all
select
'
b
'
,
'
2008-01-04
'
union
all
select
'
b
'
,
'
2008-01-07
'
union
all
select
'
b
'
,
'
2008-01-08
'
--
1要求分组 合并连续的日期 最大和最小
--
2 单独显示不连续的日期
--
徐王锦 2008/12/30 pm 16:40--
--
2005
create
function
f_4(
@name
varchar
(
10
))
returns
@table
table
(name
varchar
(
10
),date
varchar
(
4000
))
as
begin
declare
@minpx
int
,
@maxpx
int
,
@date
smalldatetime
,
@date2
smalldatetime
declare
@table2
table
(name
varchar
(
10
),date
smalldatetime
,px
int
)
insert
into
@table2
select
name,date,px
=
row_number()
over
(partition
by
name
order
by
date)
from
t
where
name
=
@name
select
@minpx
=
2
,
@maxpx
=
@@rowcount
select
@date
=
date
from
@table2
where
px
=
1
insert
into
@table
select
@name
,
convert
(
char
(
10
),
@date
,
120
)
select
@date2
=
date
from
@table2
where
px
=
@minpx
while
@minpx
<=
@maxpx
begin
if
@date2
=
(
select
cast
(
right
(date,
10
)
as
smalldatetime
)
+
1
from
@table
where
cast
(
right
(date,
10
)
as
smalldatetime
)
=
@date
)
begin
update
a
set
a.date
=
a.date
+
N
'
-
'
+
convert
(
char
(
10
),
@date2
,
120
)
from
@table
as
a
where
cast
(
right
(a.date,
10
)
as
smalldatetime
)
=
@date
end
else
if
@date2
>
(
select
cast
(
right
(date,
10
)
as
smalldatetime
)
+
1
from
@table
where
cast
(
right
(date,
10
)
as
smalldatetime
)
=
@date
)
begin
insert
into
@table
select
@name
,
convert
(
char
(
10
),
@date2
,
120
)
end
select
@date
=
@date2
,
@minpx
=
@minpx
+
1
select
@date2
=
date
from
@table2
where
px
=
@minpx
end
return
end
select
x2.name,
case
when
len
(x2.date)
>
10
then
left
(x2.date,
10
)
+
N
'
-
'
+right
(x2.date,
10
)
else
x2.date
end
as
date
from
(
select
distinct
name
from
t)x
cross
apply
f_4(x.name)
as
x2
/*
name date
---------- ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
a 2008-01-01-2008-01-03
a 2008-01-06-2008-01-07
a 2008-01-09
b 2008-01-01-2008-01-04
b 2008-01-07-2008-01-08
(5 行受影响)
*/
以上是基于函数编写的
--
ps pm1.8 内存 768. 2w行数据 耗时 01:54
select
name,
case
when
min
(date)
=
max
(date)
then
convert
(
char
(
10
),
min
(date),
120
)
else
convert
(
char
(
10
),
min
(date),
120
)
+
N
'
-
'
+
convert
(
char
(
10
),
max
(date),
120
)
end
as
date
from
(
select
name,date,date
-
row_number()
over
(partition
by
name
order
by
date)
as
px
from
t)x
group
by
name,px
order
by
name
--
pm1.8 内存 768. 2w行数据 耗时 耗时 00:00
--
2000可以用 子查询/中间表+select into 替换 具体看每个分区的平均行数.
因此
2005
/
2000中
排名函数
>
select
into
>
=
子查询(试分区中平均行数而定)
以上结论一样应用于任何需要分区排序解决的问题。