在项目中,我们经常遇到或用到分页,那么在大数据量(百万级以上)下,哪种分页算法效率最优呢?我们不妨 用事实说话。
测试环境
硬件:CPU 酷睿双核T5750 内存:2G
软件:Windows server 2003 + Sql server 2005
OK,我们首先创建一数据库:data_Test,并在此数据库中创建一表:tb_TestTable
1
create
database
data_Test
--
创建数据库data_Test
2
GO
3
use
data_Test
4
GO
5
create
table
tb_TestTable
--
创建表
6
(
7
id
int
identity
(
1
,
1
)
primary
key
,
8
userName
nvarchar
(
20
)
not
null
,
9
userPWD
nvarchar
(
20
)
not
null
,
10
userEmail
nvarchar
(
40
)
null
11
)
12
GO
然后我们在数据表中插入2000000条数据:
1
--
插入数据
2
set
identity_insert
tb_TestTable
on
3
declare
@count
int
4
set
@count
=
1
5
while
@count
<=
2000000
6
begin
7
insert
into
tb_TestTable(id,userName,userPWD,userEmail)
values
(
@count
,
'
admin
'
,
'
admin888
'
,
'
lli0077@yahoo.com.cn
'
)
8
set
@count
=
@count
+
1
9
end
10
set
identity_insert
tb_TestTable
off
我首先写了五个常用存储过程:
1,利用select top 和select not in进行分页,具体代码如下:
1
create procedure proc_paged_with_notin -- 利用select top and select not in
2
(
3
@pageIndex int , -- 页索引
4
@pageSize int -- 每页记录数
5
)
6
as
7
begin
8
set nocount on ;
9
declare @timediff datetime -- 耗时
10
declare @sql nvarchar ( 500 )
11
select @timediff = Getdate ()
12
set @sql = ' select top ' + str ( @pageSize ) + ' * from tb_TestTable where(ID not in(select top ' + str ( @pageSize * @pageIndex ) + ' id from tb_TestTable order by ID ASC)) order by ID '
13
execute ( @sql ) -- 因select top后不支技直接接参数,所以写成了字符串@sql
14
select datediff (ms, @timediff , GetDate ()) as 耗时
15
set nocount off ;
16
end
2,利用select top 和 select max(列键)
1
create procedure proc_paged_with_selectMax -- 利用select top and select max(列)
2
(
3
@pageIndex int , -- 页索引
4
@pageSize int -- 页记录数
5
)
6
as
7
begin
8
set nocount on ;
9
declare @timediff datetime
10
declare @sql nvarchar ( 500 )
11
select @timediff = Getdate ()
12
set @sql = ' select top ' + str ( @pageSize ) + ' * From tb_TestTable where(ID>(select max(id) From (select top ' + str ( @pageSize * @pageIndex ) + ' id From tb_TestTable order by ID) as TempTable)) order by ID '
13
execute ( @sql )
14
select datediff (ms, @timediff , GetDate ()) as 耗时
15
set nocount off ;
16
end
3,利用select top和中间变量--此方法因网上有人说效果最佳,所以贴出来一同测试
1
create procedure proc_paged_with_Midvar -- 利用ID>最大ID值和中间变量
2
(
3
@pageIndex int ,
4
@pageSize int
5
)
6
as
7
declare @count int
8
declare @ID int
9
declare @timediff datetime
10
declare @sql nvarchar ( 500 )
11
begin
12
set nocount on ;
13
select @count = 0 , @ID = 0 , @timediff = getdate ()
14
select @count = @count + 1 , @ID = case when @count <= @pageSize * @pageIndex then ID else @ID end from tb_testTable order by id
15
set @sql = ' select top ' + str ( @pageSize ) + ' * from tb_testTable where ID> ' + str ( @ID )
16
execute ( @sql )
17
select datediff (ms, @timediff , getdate ()) as 耗时
18
set nocount off ;
19
end
20
4,利用Row_number() 此方法为SQL server 2005中新的方法,利用Row_number()给数据行加上索引
1
create procedure proc_paged_with_Rownumber -- 利用SQL 2005中的Row_number()
2
(
3
@pageIndex int ,
4
@pageSize int
5
)
6
as
7
declare @timediff datetime
8
begin
9
set nocount on ;
10
select @timediff = getdate ()
11
select * from ( select * ,Row_number() over ( order by ID asc ) as IDRank from tb_testTable) as IDWithRowNumber where IDRank > @pageSize * @pageIndex and IDRank < @pageSize * ( @pageIndex + 1 )
12
select datediff (ms, @timediff , getdate ()) as 耗时
13
set nocount off ;
14
end
15
5,利用临时表及Row_number
1
create procedure proc_CTE -- 利用临时表及Row_number
2
(
3
@pageIndex int , -- 页索引
4
@pageSize int -- 页记录数
5
)
6
as
7
set nocount on ;
8
declare @ctestr nvarchar ( 400 )
9
declare @strSql nvarchar ( 400 )
10
declare @datediff datetime
11
begin
12
select @datediff = GetDate ()
13
set @ctestr = ' with Table_CTE as
14
(select ceiling((Row_number() over(order by ID ASC))/ ' + str ( @pageSize ) + ' ) as page_num,* from tb_TestTable) ' ;
15
set @strSql = @ctestr + ' select * From Table_CTE where page_num= ' + str ( @pageIndex )
16
end
17
begin
18
execute sp_executesql @strSql
19
select datediff (ms, @datediff , GetDate ())
20
set nocount off ;
21
end
22
OK,至此,存储过程创建完毕,我们分别在每页10条数据的情况下在第2页,第1000页,第 10000页,第100000页,第199999页进行测试,耗时单位:ms 每页测试5次取其平均值
存过 | 第2页耗时 | 第1000页耗时 | 第10000页耗时 | 第100000页耗时 | 第199999页耗时 | 效率排行 |
1用not in | 0ms | 16ms | 47ms | 475ms | 953ms | 3 |
2用select max | 5ms | 16ms | 35ms | 325ms | 623ms | 1 |
3中间变量 | 966ms | 970ms | 960ms | 945ms | 933ms | 5 |
4row_number | 0ms | 0ms | 34ms | 365ms | 710ms | 2 |
4临时表 | 780ms | 796ms | 798ms | 780ms | 805ms | 4 |
测试结果显示:select max >row_number>not in>临时表>中间变量
于是我对效率最高的select max方法用2分法进行了扩展,代码取自互联网,我修改了ASC排序时取不到值的BUG,测试结果:
2分法 | 156ms | 156ms | 180ms | 470ms | 156ms | 1* |
从测试结果来看,使用2分法确实可以提高效率并使效率更为稳定,我又增加了第159999页的测试,用时仅296ms,效果相当的不错!
下面是2分法使用select max的代码,已相当完善。
1
--
/*-----存储过程 分页处理 孙伟 2005-03-28创 建 -------*/
2
--
/*-----存储过 程 分页处理 浪尘 2008-9-1修改----------*/
3
--
/*----- 对数据进 行了2分处理使查询前半部分数据与查询后半部分数据性能相同 -------*/
4
5
alter
PROCEDURE
proc_paged_2part_selectMax
6
(
7
@tblName
nvarchar
(
200
),
--
--要显示的表或多个表 的连接
8
@fldName
nvarchar
(
500
)
=
'
*
'
,
--
--要显示的字段列表
9
@pageSize
int
=
10
,
--
--每页显示的记录个 数
10
@page
int
=
1
,
--
--要显示那一页的记 录
11
@fldSort
nvarchar
(
200
)
=
null
,
--
--排序字段列表或条件
12
@Sort
bit
=
0
,
--
--排序方法,0为升 序,1为降序(如果是多字段排列Sort指代最后一个排序字段的排列顺序(最后一个排序字段不加排序标记)--程序传参 如:' SortA Asc,SortB Desc,SortC ')
13
@strCondition
nvarchar
(
1000
)
=
null
,
--
--查询条件,不需where
14
@ID
nvarchar
(
150
),
--
--主表的主键
15
@Dist
bit
=
0
,
--
--是否添加查询字段的 DISTINCT 默认0不添加/1添加
16
@pageCount
int
=
1
output,
--
--查询结果分页后的总页数
17
@Counts
int
=
1
output
--
--查询到的记录数
18
)
19
AS
20
SET
NOCOUNT
ON
21
Declare
@sqlTmp
nvarchar
(
1000
)
--
--存放动态生成的 SQL语句
22
Declare
@strTmp
nvarchar
(
1000
)
--
--存放取得查询结果总 数的查询语句
23
Declare
@strID
nvarchar
(
1000
)
--
--存放取得查询开头或 结尾ID的查询语句
24
25
Declare
@strSortType
nvarchar
(
10
)
--
--数据排序规则A
26
Declare
@strFSortType
nvarchar
(
10
)
--
--数据排序规则B
27
28
Declare
@SqlSelect
nvarchar
(
50
)
--
--对含有 DISTINCT的查询进行SQL构造
29
Declare
@SqlCounts
nvarchar
(
50
)
--
--对含有 DISTINCT的总数查询进行SQL构造
30
31
declare
@timediff
datetime
--
耗时测试时间差
32
select
@timediff
=
getdate
()
33
34
if
@Dist
=
0
35
begin
36
set
@SqlSelect
=
'
select
'
37
set
@SqlCounts
=
'
Count(*)
'
38
end
39
else
40
begin
41
set
@SqlSelect
=
'
select distinct
'
42
set
@SqlCounts
=
'
Count(DISTINCT
'
+
@ID
+
'
)
'
43
end
44
45
46
if
@Sort
=
0
47
begin
48
set
@strFSortType
=
'
ASC
'
49
set
@strSortType
=
'
DESC
'
50
end
51
else
52
begin
53
set
@strFSortType
=
'
DESC
'
54
set
@strSortType
=
'
ASC
'
55
end
56
57
58
59
--
------生成查询语句--------
60
--
此处@strTmp为取得 查询结果数量的语句
61
if
@strCondition
is
null
or
@strCondition
=
''
--
没有设置显示条件
62
begin
63
set
@sqlTmp
=
@fldName
+
'
From
'
+
@tblName
64
set
@strTmp
=
@SqlSelect
+
'
@Counts=
'
+
@SqlCounts
+
'
FROM
'
+
@tblName
65
set
@strID
=
'
From
'
+
@tblName
66
end
67
else
68
begin
69
set
@sqlTmp
=
+
@fldName
+
'
From
'
+
@tblName
+
'
where (1>0)
'
+
@strCondition
70
set
@strTmp
=
@SqlSelect
+
'
@Counts=
'
+
@SqlCounts
+
'
FROM
'
+
@tblName
+
'
where (1>0)
'
+
@strCondition
71
set
@strID
=
'
From
'
+
@tblName
+
'
where (1>0)
'
+
@strCondition
72
end
73
74
--
--取得查询结果总数量-----
75
exec
sp_executesql
@strTmp
,N
'
@Counts int out
'
,
@Counts
out
76
declare
@tmpCounts
int
77
if
@Counts
=
0
78
set
@tmpCounts
=
1
79
else
80
set
@tmpCounts
=
@Counts
81
82
--
取得分页总数
83
set
@pageCount
=
(
@tmpCounts
+
@pageSize
-
1
)
/
@pageSize
84
85
/**/
/**/
/**/
/* *当前页大于总页数 取最 后一页* */
86
if
@page
>
@pageCount
87
set
@page
=
@pageCount
88
89
--
/*-----数据分页2分处理-------*/
90
declare
@pageIndex
int
--
总数/页大小
91
declare
@lastcount
int
--
总数%页大小
92
93
set
@pageIndex
=
@tmpCounts
/
@pageSize
94
set
@lastcount
=
@tmpCounts
%
@pageSize
95
if
@lastcount
>
0
96
set
@pageIndex
=
@pageIndex
+
1
97
else
98
set
@lastcount
=
@pagesize
99
100
--
//***显示分页
101
if
@strCondition
is
null
or
@strCondition
=
''
--
没有设置显示条件
102
begin
103
if
@pageIndex
<
2
or
@page
<=
@pageIndex
/
2
+
@pageIndex
%
2
--
前半部分数据处理
104
begin
105
if
@page
=
1
106
set
@strTmp
=
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
107
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
108
else
109
begin
110
if
@Sort
=
1
111
begin
112
set
@strTmp
=
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
113
+
'
where
'
+
@ID
+
'
<(select min(
'
+
@ID
+
'
) from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
*
(
@page
-
1
)
as
Varchar
(
20
))
+
'
'
+
@ID
+
'
from
'
+
@tblName
114
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
+
'
) AS TBMinID)
'
115
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
116
end
117
else
118
begin
119
set
@strTmp
=
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
120
+
'
where
'
+
@ID
+
'
>(select max(
'
+
@ID
+
'
) from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
*
(
@page
-
1
)
as
Varchar
(
20
))
+
'
'
+
@ID
+
'
from
'
+
@tblName
121
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
+
'
) AS TBMinID)
'
122
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
123
end
124
end
125
end
126
else
127
begin
128
set
@page
=
@pageIndex
-
@page
+
1
--
后半部分数据处理
129
if
@page
<=
1
--
最后一页数据显 示
130
set
@strTmp
=
@SqlSelect
+
'
* from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@lastcount
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
131
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TempTB
'
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
132
else
133
if
@Sort
=
1
134
begin
135
set
@strTmp
=
@SqlSelect
+
'
* from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
136
+
'
where
'
+
@ID
+
'
>(select max(
'
+
@ID
+
'
) from(
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
*
(
@page
-
2
)
+
@lastcount
as
Varchar
(
20
))
+
'
'
+
@ID
+
'
from
'
+
@tblName
137
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TBMaxID)
'
138
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TempTB
'
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
139
end
140
else
141
begin
142
set
@strTmp
=
@SqlSelect
+
'
* from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
143
+
'
where
'
+
@ID
+
'
<(select min(
'
+
@ID
+
'
) from(
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
*
(
@page
-
2
)
+
@lastcount
as
Varchar
(
20
))
+
'
'
+
@ID
+
'
from
'
+
@tblName
144
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TBMaxID)
'
145
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TempTB
'
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
146
end
147
end
148
end
149
150
else
--
有查询条件
151
begin
152
if
@pageIndex
<
2
or
@page
<=
@pageIndex
/
2
+
@pageIndex
%
2
--
前半部分数据处理
153
begin
154
if
@page
=
1
155
set
@strTmp
=
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
156
+
'
where 1=1
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
157
else
if
(
@Sort
=
1
)
158
begin
159
set
@strTmp
=
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
160
+
'
where
'
+
@ID
+
'
<(select min(
'
+
@ID
+
'
) from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
*
(
@page
-
1
)
as
Varchar
(
20
))
+
'
'
+
@ID
+
'
from
'
+
@tblName
161
+
'
where (1=1)
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
+
'
) AS TBMinID)
'
162
+
'
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
163
end
164
else
165
begin
166
set
@strTmp
=
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
167
+
'
where
'
+
@ID
+
'
>(select max(
'
+
@ID
+
'
) from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
*
(
@page
-
1
)
as
Varchar
(
20
))
+
'
'
+
@ID
+
'
from
'
+
@tblName
168
+
'
where (1=1)
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
+
'
) AS TBMinID)
'
169
+
'
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
170
end
171
end
172
else
173
begin
174
set
@page
=
@pageIndex
-
@page
+
1
--
后半部分数据处理
175
if
@page
<=
1
--
最后一页数据显示
176
set
@strTmp
=
@SqlSelect
+
'
* from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@lastcount
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
177
+
'
where (1=1)
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TempTB
'
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
178
else
if
(
@Sort
=
1
)
179
set
@strTmp
=
@SqlSelect
+
'
* from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
180
+
'
where
'
+
@ID
+
'
>(select max(
'
+
@ID
+
'
) from(
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
*
(
@page
-
2
)
+
@lastcount
as
Varchar
(
20
))
+
'
'
+
@ID
+
'
from
'
+
@tblName
181
+
'
where (1=1)
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TBMaxID)
'
182
+
'
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TempTB
'
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
183
else
184
set
@strTmp
=
@SqlSelect
+
'
* from (
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
as
VARCHAR
(
4
))
+
'
'
+
@fldName
+
'
from
'
+
@tblName
185
+
'
where
'
+
@ID
+
'
<(select min(
'
+
@ID
+
'
) from(
'
+
@SqlSelect
+
'
top
'
+
CAST
(
@pageSize
*
(
@page
-
2
)
+
@lastcount
as
Varchar
(
20
))
+
'
'
+
@ID
+
'
from
'
+
@tblName
186
+
'
where (1=1)
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TBMaxID)
'
187
+
'
'
+
@strCondition
+
'
order by
'
+
@fldSort
+
'
'
+
@strSortType
+
'
) AS TempTB
'
+
'
order by
'
+
@fldSort
+
'
'
+
@strFSortType
188
end
189
end
190
191
--
----返回查询结果-----
192
exec
sp_executesql
@strTmp
193
select
datediff
(ms,
@timediff
,
getdate
())
as
耗时
194
--
print @strTmp
195
SET
NOCOUNT
OFF
196
GO
197
执行示例:exec proc_paged_2part_selectMax 'tb_testTable','ID,userName,userPWD,userEmail',10,100000,'ID',0,null,'ID',0
这种测试只在单机进行,并且没有在实际开发WEB项目中分页测试,测试项也比较单一,所以不够全面系统,但从其效率相比上,我们可以在数据库分页算 法上进行有效的控制。
转自http://www.cnblogs.com/lli0077/archive/2008/09/03/1282862.html