安装docker
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
sudo apt-get update
sudo apt-get install docker-ce
apt install docker-compose
安装测试代码stream-sql-demo(可不安装)
mkdir stream-sql-demo
cd stream-sql-demo
wget https://raw.githubusercontent.com/romainr/query-demo/master/stream-sql-demo/docker-compose.yml
docker-compose up -d
安装hue
docker run -it -p 8888:8888 gethue/hue:latest
#或者导入镜像
docker load -i hue_docker.tar
修改配置文件hue.ini
1、配置数据库连接
[desktop]
[[database]]
engine=mysql
host=127.0.0.1
port=3306
user=root
password=secret
name=hue
2、配置读取mysql
[notebook]
[[interpreters]]
[[[mysql]]]
name = MySQL
interface=sqlalchemy
options='{"url": "mysql://root:secret@database-host:3306/hue"}'
2、配置hdfs
[hadoop]
# Configuration for HDFS NameNode
# ------------------------------------------------------------------------
[[hdfs_clusters]]
# HA support by using HttpFs
[[[default]]]
# Enter the filesystem uri
fs_defaultfs=hdfs://192.168.36.137:8020
webhdfs_url=http://192.168.36.138:14000/webhdfs/v1
hadoop_conf_dir=/etc/hadoop/conf
3、配置yarn
[[yarn_clusters]]
[[[default]]]
# Enter the host on which you are running the ResourceManager
resourcemanager_host=192.168.36.138
# The port where the ResourceManager IPC listens on
resourcemanager_port=8032
# Whether to submit jobs to this cluster
submit_to=True
# Resource Manager logical name (required for HA)
## logical_name=
# Change this if your YARN cluster is Kerberos-secured
## security_enabled=false
# URL of the ResourceManager API
resourcemanager_api_url=http://192.168.36.138:8088
# URL of the ProxyServer API
proxy_api_url=http://192.168.36.138:8088
# URL of the HistoryServer API
history_server_api_url=http://192.168.36.138:19888
4、配置hive
[desktop]
thrift_version=7
[beeswax]
# Host where HiveServer2 is running.
# If Kerberos security is enabled, use fully-qualified domain name (FQDN).
hive_server_host=192.168.36.139
# Binary thrift port for HiveServer2.
hive_server_port=10000
hive_conf_dir=/etc/hive/conf
[notebook]
[[interpreters]]
[[[hive]]]
name=Hive
interface=hiveserver2
5、配置flink
注意:配置之前,请先安装flink-sql-gataway,并保证与flink-sql-gataway同在一台机子上有flink集群
https://blog.youkuaiyun.com/wqy1200/article/details/109746704
[notebook]
enable_notebook_2=true
[[interpreters]]
[[[flink]]]
name=Flink
interface=flink
options='{"url": "http://172.18.0.7:8083"}'
6、配置spark
6.1、安装livy
下载https://livy.incubator.apache.org/
unzip apache-livy-0.7.0-incubating-bin.zip
修改配置
#修改配置文件 conf/livy-env.sh
export HADOOP_CONF_DIR=/etc/hadoop/conf
export SPARK_HOME=/opt/cloudera/parcels/CDH/lib/spark
export JAVA_HOME=/usr/local/java
#修改配置文件conf/livy.conf
livy.server.port = 8998
livy.spark.master = yarn
livy.server.session.timeout = 1h
livy.impersonation.enabled = true
livy.repl.enable-hive-context = true
启动程序,注意:使用hdfs账号启动
sudo -u hdfs ./livy-server 1>&2 &
6.2、修改hue.ini
[notebook]
[[interpreters]]
[[[sql]]]
name=SparkSql
interface=livy
[[[spark]]]
name=Scala
interface=livy
[[[pyspark]]]
name=PySpark
interface=livy
[spark]
# The Livy Server URL.
livy_server_url=http://192.168.36.138:8998
7、配置oozie