【sql】使用开窗函数求连续数据的分析思路
现有一张nba球队年度冠军表,需要把连续夺冠的球队以及连续范围内的开始年份、结束年份查询出来。
需要用到开窗函数,这里使用oracle数据库(mysql8以上版本也可使用开窗函数)
create table NBA
(
team varchar2(200),
y number(4)
);
insert into NBA (team, y) values ('活塞', 1990);
insert into NBA (team, y) values ('公牛', 1991);
insert into NBA (team, y) values ('公牛', 1992);
insert into NBA (team, y) values ('公牛', 1993);
insert into NBA (team, y) values ('火箭', 1994);
insert into NBA (team, y) values ('火箭', 1995);
insert into NBA (team, y) values ('公牛', 1996);
insert into NBA (team, y) values ('公牛', 1997);
insert into NBA (team, y) values ('公牛', 1998);
insert into NBA (team, y) values ('马刺', 1999);
insert into NBA (team, y) values ('湖人', 2000);
insert into NBA (team, y) values ('湖人', 2001);
insert into NBA (team, y) values ('湖人', 2002);
insert into NBA (team, y) values ('马刺', 2003);
insert into NBA (team, y) values ('活塞', 2004);
insert into NBA (team, y) values ('马刺', 2005);
insert into NBA (team, y) values ('热火', 2006);
insert into NBA (team, y) values ('马刺', 2007);
insert into NBA (team, y) values ('凯尔特人', 2008);
insert into NBA (team, y) values ('湖人', 2009);
insert into NBA (team, y) values ('湖人', 2010);
- 使用
ROW_NUMBER()
按球队分组,年份排序;用年份减去序号,如果是连续夺冠,则得到的YR
值相同。
SELECT n.*,Y-ROW_NUMBER() OVER(PARTITION BY TEAM ORDER BY Y) YR FROM NBA n
2. 统计YR
的数量,用第1个年份加上数量再减1就可以得到结束年份。
SELECT a.*,Y+COUNT(*)OVER(PARTITION BY TEAM,YR)-1 y2,ROW_NUMBER ()OVER(PARTITION BY team ,yr ORDER BY yr)rn FROM (
SELECT n.*,Y-ROW_NUMBER() OVER(PARTITION BY TEAM ORDER BY Y) YR FROM NBA n)a
3. 最后只需要筛选RN
为1并且起始年份与终止年份不相等的数据即可。
SELECT team,y 起始年份,y2 终止年份 from(
SELECT a.*,Y+COUNT(*)OVER(PARTITION BY TEAM,YR)-1 y2,ROW_NUMBER ()OVER(PARTITION BY team ,yr ORDER BY yr)rn FROM (
SELECT n.*,Y-ROW_NUMBER() OVER(PARTITION BY TEAM ORDER BY Y) YR FROM NBA n)a)WHERE rn=1 AND y <>y2 ;