处理表重复记录(查询和删除)

SQL查询与删除重复记录技术
本文详细介绍了SQL中查询及删除重复记录的方法,包括名相同ID不同情况下的选择性保留,以及没有大小关系时的处理策略。同时,提供了删除重复记录时的多种SQL语句实现,帮助开发者有效管理数据库中的冗余数据。

处理表重复记录(查询和删除)

查询
1、Name相同的重复值记录,没有大小关系只保留一条
2、Name相同,ID有大小关系时,保留大或小其中一个记录
 
一、用于查询重复处理记录(如果列没有大小关系时SQL2000用生成自增列和临时表处理,SQL2005用row_number函数处理)
 
生成测试数据

 1 IF NOT OBJECT_ID('Tempdb..#T') IS NULL 
 2     DROP TABLE #T
 3 Go
 4 CREATE TABLE #T
 5 (
 6   [ID] INT ,
 7   [Name] NVARCHAR(1) ,
 8   [Memo] NVARCHAR(2)
 9 )
10 INSERT  #T
11         SELECT  1, N'A', N'A1'
12         UNION ALL
13         SELECT  2, N'A', N'A2'
14         UNION ALL
15         SELECT  3, N'A', N'A3'
16         UNION ALL
17         SELECT  4, N'B', N'B1'
18         UNION ALL
19         SELECT  5, N'B', N'B2'
20 Go

 

1、Name相同ID最小的记录(推荐用1,2,3),方法3在SQl2005时,效率高于1、2

 1 --方法1:
 2 Select * from #T a where not exists(select 1 from #T where Name=a.Name and ID<a.ID)
 3  
 4 --方法2:
 5 select a.* from #T a join (select min(ID)ID,Name from #T group by Name) b on a.Name=b.Name and a.ID=b.ID
 6  
 7 --方法3:
 8 select * from #T a where ID=(select min(ID) from #T where Name=a.Name)
 9  
10 --方法4:
11 select a.* from #T a join #T b on a.Name=b.Name and a.ID>=b.ID group by a.ID,a.Name,a.Memo having count(1)=1 
12  
13 --方法5:
14 select * from #T a group by ID,Name,Memo having ID=(select min(ID)from #T where Name=a.Name)
15  
16 --方法6:
17 select * from #T a where (select count(1) from #T where Name=a.Name and ID<a.ID)=0
18  
19 --方法7:
20 select * from #T a where ID=(select top 1 ID from #T where Name=a.name order by ID)
21  
22 --方法8:
23 select * from #T a where ID!>all(select ID from #T where Name=a.Name)
24  
25 --方法9(注:ID为唯一时可用):
26 select * from #T a where ID in(select min(ID) from #T group by Name)
27  
28 --SQL2005:
29  
30 --方法10:
31 select ID,Name,Memo from (select *,min(ID)over(partition by Name) as MinID from #T a)T where ID=MinID
32  
33 --方法11:
34 select ID,Name,Memo from (select *,row_number()over(partition by Name order by ID) as MinID from #T a)T where MinID=1

2、Name相同ID最大的记录,与min相反

 1 --方法1:
 2 Select * from #T a where not exists(select 1 from #T where Name=a.Name and ID>a.ID)
 3  
 4 --方法2:
 5 select a.* from #T a join (select max(ID)ID,Name from #T group by Name) b on a.Name=b.Name and a.ID=b.ID order by ID
 6  
 7 --方法3:
 8 select * from #T a where ID=(select max(ID) from #T where Name=a.Name) order by ID
 9  
10 --方法4:
11 select a.* from #T a join #T b on a.Name=b.Name and a.ID<=b.ID group by a.ID,a.Name,a.Memo having count(1)=1 
12  
13 --方法5:
14 select * from #T a group by ID,Name,Memo having ID=(select max(ID)from #T where Name=a.Name)
15  
16 --方法6:
17 select * from #T a where (select count(1) from #T where Name=a.Name and ID>a.ID)=0
18  
19 --方法7:
20 select * from #T a where ID=(select top 1 ID from #T where Name=a.name order by ID desc)
21  
22 --方法8:
23 select * from #T a where ID!<all(select ID from #T where Name=a.Name)
24  
25 --方法9(注:ID为唯一时可用):
26 select * from #T a where ID in(select max(ID) from #T group by Name)
27  
28 --SQL2005:
29  
30 --方法10:
31 select ID,Name,Memo from (select *,max(ID)over(partition by Name) as MinID from #T a)T where ID=MinID
32  
33 --方法11:
34 select ID,Name,Memo from (select *,row_number()over(partition by Name order by ID desc) as MinID from #T a)T where MinID=1

 


删除

二、删除重复记录有大小关系时,保留大或小其中一个记录

生成测试数据

 1 USE [tempdb]
 2 GO
 3 IF NOT OBJECT_ID('Tempdb..#T') IS NULL 
 4     DROP TABLE #T
 5 Go
 6 CREATE TABLE #T
 7 (
 8   [ID] INT ,
 9   [Name] NVARCHAR(1) ,
10   [Memo] NVARCHAR(2)
11 )
12 INSERT  #T
13         SELECT  1, N'A', N'A1'
14         UNION ALL
15         SELECT  2, N'A', N'A2'
16         UNION ALL
17         SELECT  3, N'A', N'A3'
18         UNION ALL
19         SELECT  4, N'B', N'B1'
20         UNION ALL
21         SELECT  5, N'B', N'B2'
22 Go

 

1、Name相同ID最小的记录(推荐用1,2,3),保留最小一条

 1 --方法1:
 2 delete a from #T a where  exists(select 1 from #T where Name=a.Name and ID<a.ID)
 3  
 4 --方法2:
 5 delete a  from #T a left join (select min(ID)ID,Name from #T group by Name) b on a.Name=b.Name and a.ID=b.ID where b.Id is null
 6  
 7 --方法3:
 8 delete a from #T a where ID not in (select min(ID) from #T where Name=a.Name)
 9  
10 --方法4(注:ID为唯一时可用):
11 delete a from #T a where ID not in(select min(ID)from #T group by Name)
12  
13 --方法5:
14 delete a from #T a where (select count(1) from #T where Name=a.Name and ID<a.ID)>0
15  
16 --方法6:
17 delete a from #T a where ID<>(select top 1 ID from #T where Name=a.name order by ID)
18  
19 --方法7:
20 delete a from #T a where ID>any(select ID from #T where Name=a.Name)
21  
22  
23 select * from #T

 

2、Name相同ID保留最大的一条记录

 1 --方法1:
 2 delete a from #T a where  exists(select 1 from #T where Name=a.Name and ID>a.ID)
 3  
 4 --方法2:
 5 delete a  from #T a left join (select max(ID)ID,Name from #T group by Name) b on a.Name=b.Name and a.ID=b.ID where b.Id is null
 6  
 7 --方法3:
 8 delete a from #T a where ID not in (select max(ID) from #T where Name=a.Name)
 9  
10 --方法4(注:ID为唯一时可用):
11 delete a from #T a where ID not in(select max(ID)from #T group by Name)
12  
13 --方法5:
14 delete a from #T a where (select count(1) from #T where Name=a.Name and ID>a.ID)>0
15  
16 --方法6:
17 delete a from #T a where ID<>(select top 1 ID from #T where Name=a.name order by ID desc)
18  
19 --方法7:
20 delete a from #T a where ID<any(select ID from #T where Name=a.Name)
21  
22  
23 select * from #T

 


删除重复记录没有大小关系时,处理重复值

生成测试数据

 1 USE [tempdb]
 2 GO
 3 IF NOT OBJECT_ID('Tempdb..#T') IS NULL 
 4     DROP TABLE #T
 5 Go
 6 CREATE TABLE #T
 7 (
 8   [Num] INT ,
 9   [Name] NVARCHAR(1)
10 )
11 INSERT  #T
12         SELECT  1, N'A'
13         UNION ALL
14         SELECT  1, N'A'
15         UNION ALL
16         SELECT  1, N'A'
17         UNION ALL
18         SELECT  2, N'B'
19         UNION ALL
20         SELECT  2, N'B'
21 Go

方法1:

 1 if object_id('Tempdb..#') is not null
 2     drop table #
 3 Select distinct * into # from #T--排除重复记录结果集生成临时表#
 4  
 5 truncate table #T--清空表
 6  
 7 insert #T select * from #    --把临时表#插入到表#T中
 8  
 9 --查看结果
10 select * from #T

 

方法2:

1 alter table #T add ID int identity--新增标识列
2 go
3 delete a from  #T a where  exists(select 1 from #T where Num=a.Num and Name=a.Name and ID>a.ID)--只保留一条记录
4 go
5 alter table #T drop column ID--删除标识列
6  
7 --查看结果
8 select * from #T

方法3:

 1 declare Roy_Cursor cursor local for
 2 select count(1)-1,Num,Name from #T group by Num,Name having count(1)>1
 3 declare @con int,@Num int,@Name nvarchar(1)
 4 open Roy_Cursor
 5 fetch next from Roy_Cursor into @con,@Num,@Name
 6 while @@Fetch_status=0
 7 begin 
 8     set rowcount @con;
 9     delete #T where Num=@Num and Name=@Name
10     set rowcount 0;
11     fetch next from Roy_Cursor into @con,@Num,@Name
12 end
13 close Roy_Cursor
14 deallocate Roy_Cursor
15  
16 --查看结果
17 select * from #T

转载自:http://bbs.youkuaiyun.com/topics/240034273


如有不对的地方,欢迎大家拍砖o(∩_∩)o

一、数据采集层:多源人脸数据获取 该层负责从不同设备 / 渠道采集人脸原始数据,为后续模型训练与识别提供基础样本,核心功能包括: 1. 多设备适配采集 实时摄像头采集: 调用计算机内置摄像头(或外接 USB 摄像头),通过OpenCV的VideoCapture接口实时捕获视频流,支持手动触发 “拍照”(按指定快捷键如Space)或自动定时采集(如每 2 秒采集 1 张),采集时自动框选人脸区域(通过Haar级联分类器初步定位),确保样本聚焦人脸。 支持采集参数配置:可设置采集分辨率(如 640×480、1280×720)、图像格式(JPG/PNG)、单用户采集数量(如默认采集 20 张,确保样本多样性),采集过程中实时显示 “已采集数量 / 目标数量”,避免样本不足。 本地图像 / 视频导入: 支持批量导入本地人脸图像文件(支持 JPG、PNG、BMP 格式),自动过滤非图像文件;导入视频文件(MP4、AVI 格式)时,可按 “固定帧间隔”(如每 10 帧提取 1 张图像)或 “手动选择帧” 提取人脸样本,适用于无实时摄像头场景。 数据集对接: 支持接入公开人脸数据集(如 LFW、ORL),通过预设脚本自动读取数据集目录结构(按 “用户 ID - 样本图像” 分类),快速构建训练样本库,无需手动采集,降低系统开发与测试成本。 2. 采集过程辅助功能 人脸有效性校验:采集时通过OpenCV的Haar级联分类器(或MTCNN轻量级模型)实时检测图像中是否包含人脸,若未检测到人脸(如遮挡、侧脸角度过),则弹窗提示 “未识别到人脸,请调整姿态”,避免无效样本存入。 样本标签管理:采集时需为每个样本绑定 “用户标签”(如姓名、ID 号),支持手动输入标签或从 Excel 名单批量导入标签(按 “标签 - 采集数量” 对应),采集完成后自动按 “标签 - 序号” 命名文件(如 “张三
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值