impala数据的批量查询

本文展示了一种使用Impala连接大数据集群并执行复杂SQL查询的方法,通过具体示例介绍了如何从特定产品ID中获取用户登录和DAU数据,同时演示了数据处理和CSV文件输出的过程。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

from impala.dbapi import connect as impala_connect
import csv
import datetime

ISO_FORMAT = '%Y-%m-%d %X'
impala_port = 21050
impala_host = '10.8.28.35'


def get_impala_conn():
    return impala_connect(host=impala_host, port=impala_port)


def impala_query(curr_sql):
    return conn_query(get_impala_conn(), curr_sql)


def conn_query(conn, curr_sql):
    print curr_sql
    cursor = conn.cursor()
    cursor.execute(curr_sql)
    result = cursor.fetchall()
    cursor.close()
    conn.close()
    return result


normal_columns = ['user_id', 'client_id']
def csv_file(result, out_file_name):
    with open(out_file_name, 'a') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerows(result)

def dateRange(beginDate, endDate):
    dates = []
    dt = datetime.datetime.strptime(beginDate, "%Y-%m-%d")
    date = beginDate[:]
    while date <= endDate:
        date_str = "'" + date + "' "
        dates.append(date_str)
        dt = dt + datetime.timedelta(1)
        date = dt.strftime("%Y-%m-%d")
    return dates

if __name__ == '__main__':
    product_id=["6","9999","20006","20013","20014","20015","20021","20022","20031","20053","20066","20098","20113","20152","20183","20208",'20216']
    for product in product_id:
        sql = """select a.*,b.DAU from
        (select cloud_id,product_id,day,count(distinct user_id)login from bicloud_log.scene_login
         where day= '2018-12-19'        
         and cloud_id='28'        
         and product_id=%s 
         and event_id='11003'
         group by cloud_id,product_id,day)a
        inner join
        (select cloud_id,product_id,day,count(distinct user_id)DAU from bicloud_log.scene_login
         where day= '2018-12-19'
         and cloud_id='28'
         and product_id =%s
         and event_id='11009'
         group by cloud_id,product_id,day)b
        on a.product_id=b.product_id and a.cloud_id=b.cloud_id and a.day=b.day
        order by a.product_id
        """ % (product,product)
        tu = impala_query(sql)
        tu = tu[0]
        lint = list(tu)
        lint[0] = int(lint[0])
        lint[1] = int(lint[1])
        lint[2] = str(lint[2])
        lint[3] = int(lint[3])
        lint[4] = int(lint[4])
        print(lint)
        ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值