step 1: 下载spark包,如:spark-1.6.1-bin-hadoop2.6.tgz
step 2: 解压:
tar zxvf spark-1.6.1-bin-hadoop2.6.tgz
step 3: 配置hive数据源
方法1:
将{hive_home}/conf下的hive-site.xml复制到spark-1.6.1-bin-hadoop2.6/conf, 修改hive.server2.thrift.port(监听端口)和hive.server2.thrift.bind.host的值
方法2:
在spark-1.6.1-bin-hadoop2.6/conf文件夹下,新建文件hive-site.xml,写入:
<?xml version="1.0" encoding="UTF-8"?>
<!--Autogenerated by Cloudera Manager-->
<configuration>
<property>
<name>hive.metastore.uris</name>
<value>thrift://Hive-Metastore-Server:9083</value>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10001</value>
<description>Port number of HiveServer2 Thrift interface.
Can be overridden by setting $HIVE_SERVER2_THRIFT_PORT</description>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>datanode2</value>
<description>Bind host on which to run the HiveServer2 Thrift interface.
Can be overridden by setting $HIVE_SERVER2_THRIFT_BIND_HOST</description>
</property>
</configuration>
step 4: 配置相关环境变量
在spark-1.6.1-bin-hadoop2.6/conf文件夹下,新建文件spark_env.sh,写入:
export HADOOP_CONF_DIR=/etc/hadoop/conf
SPARK_LIBRARY_PATH=$SPARK_LIBRARY_PATH:/opt/cloudera/parcels/HADOOP_LZO/lib/hadoop/lib/native/*
SPARK_CLASSPATH=$SPARK_CLASSPATH:/opt/cloudera/parcels/HADOOP_LZO/lib/hadoop/lib/*
LD_LIBRARY_PATH=LD_LIBRARY_PATH:/opt/cloudera/parcels/HADOOP_LZO/lib/hadoop/lib/native
step 5: 启动thriftserver
进入spark目录,执行
./sbin/start-thriftserver.sh --master yarn
step 6: 用beeline测试
启动:
./bin/beeline
连接:(192.168.80.5 对应hive.server2.thrift.bind.host;10001 对应 hive.server2.thrift.port)
beeline> !connect jdbc:hive2://datanode2 :10001
测试:
0: jdbc:hive2://datanode2:10001> show databases;
返回结果如下,则说明thriftserver测试成功,可以开心的使用了
+-------------------+--+
| result |
+-------------------+--+
| default |
| test |
+-------------------+--+
step 7: 测试
java通过jdbc连接spark-sql
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class Demo {
public static void main(String[] args) throws Exception {
Class.forName("org.apache.hive.jdbc.HiveDriver");
runSparkSql();
}
private static void runSparkSql() throws Exception {
Connection con = DriverManager.getConnection("jdbc:hive2://datanode2:10001");
Statement stmt = con.createStatement();
ResultSet rs = stmt.executeQuery("SELECT * FROM test");
print(rs);
}
private static void print(ResultSet rs) throws SQLException {
// print the results to the console
while (rs.next()) {
System.out.println(rs.getString(1));
}
}
}