1、连接方式
1.1beeline连接hive
[bdp@host66 bdp]$ beeline
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/cloudera/parcels/CDH-5.8.4-1.cdh5.8.4.p0.5/jars/slf4j-log4j12-1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/home/hadoop/spark-test.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/home/hadoop/phoenix.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/home/hadoop/phoenix_CDH5.3.1.jar!/org/slf4j/impl/StaticLoggerBinder.class]
...........................................................................
beeline> !connect jdbc:hive2://host66:10000/default;principal=hive/host66@NBDP.COM
Connecting to jdbc:hive2://host66:10000/default;principal=hive/host66@NBDP.COM
Enter username for jdbc:hive2://host66:10000/default;principal=hive/host66@NBDP.COM: bdp
Enter password for jdbc:hive2://host66:10000/default;principal=hive/host66@NBDP.COM: **
也可以直接指定库名连接
beeline> !connect jdbc:hive2://host66:10000/smart_test;principal=hive/host66@NBDP.COM
1.2hive连接,直接hive命令。
2.数据导出
2.1beeline导出
[bdp@host66 bdp]$/usr/bin/beeline -u "jdbc:hive2://host66:10000/default;principal=hive/host66@NBDP.COM" --silent=true --delimiterForDSV=$'\005' --outputformat=dsv --incremental=true --showHeader=true nullemptystring=true -e " use smart_test; select * from prod_inst_injection_label_ext0 limit 10;" > /home/bdp/smarttest/test.txt;
2.2脚本导出
2.2.1beeline脚本的方式
#!/bin/bash
if [ $# -lt 9 ];then
echo " 参数校验失败!参数存在异常!"
echo "retCode -1"
exit 1
fi
##ftp信息
ftpIP=$1
ftpUser=$2
ftpPassword=$3
ftpPath=$4
##tableName,为database.table
tableName=$5
echo ${tableName}
##集群服务器上hive导出文件的路径
filePath=$6
##导出文件的文件名
fileName=$7
##账期
acct_time=$8
##beeline命令参数信息
beelineInfo=$9
knowledgeCode=${fileName}".txt"
##knowledgeCheckCode=${fileName}"_ch.txt"
wholePath=${filePath}${fileName}".txt"
##wholeCheckPath=${filePath}${fileName}"_ch.txt"
echo ${wholePath}
##echo ${wholeCheckPath}
##判断文件夹是否存在
if [[ ! -d $filePath ]]; then
echo "文件夹不存在!"
mkdir -p $filePath
else
echo "文件夹存在"
fi
chmod 777 $filePath
#export HADOOP_HOME=/opt/cloudera/parcels/CDH
#export HADOOP_CONF_DIR=/etc/hadoop/conf
##执行hive导出到指定TXT文件
/opt/cloudera/parcels/CDH/bin/beeline ${beelineInfo} --outputformat=dsv --incremental=true --showHeader=true --nullemptystring=true -e "set hive.resultset.use.unique.column.names=false; ${tableName} " > ${wholePath}
##计算总行数
totalNum=`cat ${wholePath}|wc -l`
echo ${totalNum}
##生成check文件 不用了
##cat > ${wholeCheckPath} << END_TEXT
##total : $((totalNum-1));
##acct_time : ${acct_time}
##END_TEXT
##判断远程目录下面是否已经存在当前账期的目录
function checkfile()
{
ftp -n<<!
open $ftpIP
user $ftpUser $ftpPassword
ls $ftpPath
bye
!
}
##ftp上传到服务器如果存在对应的文件夹则不创建
if checkfile ${acct_time} | grep ${acct_time}
then
ftp -n<<!
open $ftpIP
user $ftpUser $ftpPassword
binary
cd $ftpPath${acct_time}
lcd $filePath
prompt
mput $knowledgeCode
close
bye
!
else
ftp -n<<!
open $ftpIP
user $ftpUser $ftpPassword
binary
mkdir $ftpPath${acct_time}
cd $ftpPath${acct_time}
lcd $filePath
prompt
mput $knowledgeCode
close
bye
!
fi
echo "totalNum:"$((totalNum-1))
echo "retCode:0"
echo "retMes:执行完毕!"
exit 0;
脚本入参:
/home/hadoop/smart/upload/exportTargrp.sh 136.6.204.48 hadoop hadoop /home/hadoop/smart/upload/target/ "use testhive2; select * from ftp_hive;" /home/hadoop/smart/upload/testAcct/ tar_grp_362 20180821 "-u 'jdbc:hive2://hadoop28:10000/default;principal=hive/hadoop28@NBDP.COM' --hiveconf mapreduce.job.queuename=default"
2.2.2hive直接查询导出脚本
#!/bin/bash
#set hive.cli.print.header=true; // 打印列名
#set hive.cli.print.row.to.vertical=true; // 开启行转列功能, 前提必须开启打印列名功能
#set hive.cli.print.row.to.vertical.num=1; // 设置每行显示的列数
hive -e "set hive.cli.print.header=true;set hive.resultset.use.unique.column.names=false;use smart_test;select * from prod_inst_injection_label_ext0 "|tr "\t" "|" >> /home/bdp/smarttest/exportTargrp.txt;
#替换NULL字符串为\N
sed -i 's/NULL/\\N/g' /home/bdp/smarttest/exportTargrp.txt;
#删除行数据
sed -i '1,2d' /home/bdp/smarttest/exportTargrp.txt;
sed 删除的是下面红色框地数据,否则列头多这两行数据。