from impala.dbapi import connect as impala_connect
import csv
import datetime
ISO_FORMAT = '%Y-%m-%d %X'
impala_port = 21050
impala_host = '10.8.28.35'
def get_impala_conn():
return impala_connect(host=impala_host, port=impala_port)
def impala_query(curr_sql):
return conn_query(get_impala_conn(), curr_sql)
def conn_query(conn, curr_sql):
print curr_sql
cursor = conn.cursor()
cursor.execute(curr_sql)
result = cursor.fetchall()
cursor.close()
conn.close()
return result
normal_columns = ['user_id', 'client_id']
def csv_file(result, out_file_name):
with open(out_file_name, 'a') as csv_file:
writer = csv.writer(csv_file)
writer.writerows(result)
def dateRange(beginDate, endDate):
dates = []
dt = datetime.datetime.strptime(beginDate, "%Y-%m-%d")
date = beginDate[:]
while date <= endDate:
date_str = "'" + date + "' "
dates.append(date_str)
dt = dt + datetime.timedelta(1)
date = dt.strftime("%Y-%m-%d")
return dates
if __name__ == '__main__':
product_id=["6","9999","20006","20013","20014","20015","20021","20022","20031","20053","20066","20098","20113","20152","20183","20208",'20216']
for product in product_id:
sql = """select a.*,b.DAU from
(select cloud_id,product_id,day,count(distinct user_id)login from bicloud_log.scene_login
where day= '2018-12-19'
and cloud_id='28'
and product_id=%s
and event_id='11003'
group by cloud_id,product_id,day)a
inner join
(select cloud_id,product_id,day,count(distinct user_id)DAU from bicloud_log.scene_login
where day= '2018-12-19'
and cloud_id='28'
and product_id =%s
and event_id='11009'
group by cloud_id,product_id,day)b
on a.product_id=b.product_id and a.cloud_id=b.cloud_id and a.day=b.day
order by a.product_id
""" % (product,product)
tu = impala_query(sql)
tu = tu[0]
lint = list(tu)
lint[0] = int(lint[0])
lint[1] = int(lint[1])
lint[2] = str(lint[2])
lint[3] = int(lint[3])
lint[4] = int(lint[4])
print(lint)
```
impala数据的批量查询
最新推荐文章于 2024-01-04 18:32:46 发布