a,2020-02-05,200
a,2020-02-06,300
a,2020-03-04,400
a,2020-03-05,600
b,2020-02-06,300
b,2020-02-08,200
b,2020-02-09,400
b,2020-02-10,600
c,2020-01-31,200
c,2020-02-01,300
a,2020-02-07,200
a,2020-02-08,400
a,2020-02-10,600
b,2020-02-05,200
a,2020-03-01,200
a,2020-03-02,300
a,2020-03-03,200
c,2020-02-02,200
c,2020-02-03,400
c,2020-02-10,600
1.创建MergeTree表
create table shop (
name String,
cdate Date,
money Float64
)engine = MergeTree()
order by (name,cdate);
2.导入数据
clickhouse-client -q ‘insert into tb_shop FORMAT CSV’ < /data/shop.txt
3.select * from shop;
4.
select
name,
groupArray(cdate) as dates
from
shop
group by name;
-
array join 日期 - 编号
select
name,
dy,
idx
from
(
select
name,
groupArray(cdate) as dates,
arrayEnumerate(dates) as idxs
from
shop
group by name
) array join
dates as dy ,
idxs as idx
order by name; -
日期做差
select
name,
dy,
idx,
subtractDays(dy,idx) sub
from
(
select
name,
groupArray(cdate) as dates,
arrayEnumerate(dates) as idxs
from
shop
group by name
) array join
dates as dy ,
idxs as idx
order by name;
7.分组,聚合
select
name,
sub,
count(1) cnt
from
(
select
name,
dy,
idx,
subtractDays(dy,idx) sub
from
(
select
name,
groupArray(cdate) as dates,
arrayEnumerate(dates) as idxs
from
shop
group by name
) array join
dates as dy ,
idxs as idx
order by name
)
group by name,sub
having cnt >= 3
order by name,cnt desc
limit 1 by name;
这篇博客介绍了如何在ClickHouse中创建MergeTree表,导入数据并进行查询。通过示例展示了如何对时间序列数据进行分组、计算日期差并筛选连续日期,最后进行了聚合分析,找出具有特定日期差数目的记录。
856

被折叠的 条评论
为什么被折叠?



