从神策平台同步数据到hdfs示例:
可参考以下官方文档:
https://manual.sensorsdata.cn/sa/docs/tech_export_jdbc/v0204
1. 测试环境地址
jdbc:hive2://11.22.33.44:21050/rawdata;auth=noSasl
--测试连通性
ping 11.22.33.44
telnet 11.22.33.44 21050
2. jdbc方式连接
beeline -u "jdbc:hive2://11.22.33.44:21050/rawdata;auth=noSasl"
beeline -u "jdbc:hive2://11.22.33.44:21050/rawdata;auth=noSasl" -e "select 1"
语法:
--查询数据库
show databases;
use rawdata;
--查询数据表
show tables;
--查看表结构
desc events /*SA(ylfx)*/;
--查询表数据量
SELECT
count(1)
FROM rawdata.events
/*SA(ylfx)*/;
1798483
--查询样例数据
SELECT
*
FROM rawdata.events limit 3
/*SA(ylfx)*/;
--数据导出到神策所在的hdfs集群(文件默认分隔符是'\001') (导出数据为0)
create table test0117 STORED AS textfile LOCATION '/tmp' as
/*SA_BEGIN*/
select event,user_id,day,event_id,month_id,week_id,distinct_id,date,time
FROM rawdata.events
where date >= '2025-01-15 00:00:00'
/*SA_END*/;
3. impala-shell 方式连接
impala-shell -i 11.22.33.44
impal