# -*- coding: UTF-8 -*-
import pandas as pd
from sqlalchemy import *
from string import Template
from pandahouse import *
import numpy as np
connection = {
'host': 'http://192.168.30.190:8121',
}
def query(sqlstr, parameters):
df = pd.DataFrame()
try:
s = Template(sqlstr)
sql_to_run = s.substitute(**parameters)
df = read_clickhouse(sql_to_run, connection=connection)
except Exception as e:
print(e)
return df
sql = """
select
lower(srcUserName) as userKey,
lower(databaseName) as db,
count(*) as interactions
from some_database
where
startTime > '${time1}'
and startTime <= '${time2}'
group by
userKey,
db;
"""
parameters = {}
parameters['time1'] = '2020-07-25 00:00:00'
parameters['time2'] = '2021-08-25 00:00:00'
edges = query(sql, parameters)
edges
python 使用 pandahouse 访问 clickhouse
最新推荐文章于 2025-10-09 20:28:01 发布
该博客通过ClickHouse查询,展示了从2020年7月至2021年8月期间,用户在'some_database'中的活动量,按用户Key和数据库划分的交互次数统计。
8000





