文章目录
0 总体介绍
基于python开发的自动化运维工具,集合了众多运维工具(puppet、cfengine、chef、func、fabric)的优点,实现了批量系统配置、批量程序部署、批量运行命令等功能。
ansible是基于模块工作的,本身没有批量部署的能力。真正具有批量部署的是ansible所运行的模块,ansible只是提供一种框架。主要包括:
- 连接插件connection plugins:负责和被监控端实现通信
- host inventory:指定操作的主机,是一个配置文件里面定义监控的主机
- 各种模块核心模块、command模块、自定义模块
- 借助于插件完成记录日志邮件等功能
- playbook:剧本执行多个任务时,非必需可以让节点一次性运行多个任务
1 Ansible安装
1.1 Yum安装最新发布版本
yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
yum install ansible
1.2 Ansible简单配置
vim /etc/ansible/ansible.cfg
host_key_checking = False # 关闭host_key检查
log_path = /var/log/ansible.log # Ansible输出日志
inventory = /etc/ansible/hosts # 存放可通信主机
library = /usr/share/my_modules/ # Ansible默认搜寻模块的位置
module_utils = /usr/share/my_module_utils/ # Ansible默认搜寻模块工具的位置
remote_tmp = ~/.ansible/tmp # Ansible临时目录
local_tmp = ~/.ansible/tmp # Ansible临时目录
plugin_filters_cfg = /etc/ansible/plugin_filters.yml # Ansible过滤设置
module_lang = en_US.UTF-8 # 模块和系统之间通信的字符集
deprecation_warnings = False # 忽略警告
gathering = implicit # 控制默认facts收集远程系统变量. 默认值为’implicit’,每次执行playbooks,facts都会被收集
host_key_checking = False # 检查主机密钥
ask_pass=True # 每次执行ansible命令是否询问ssh密码
ask_sudo_pass=True # 每次执行ansible命令时是否询问sudo密码
注::
- 本实验所有plqybook均放在 192.268.137.129:/data/ansible/playbook 路径下
2 Ansible学习实例
2.1 Ansible部署互信
- 1 配置hosts
vim /etc/ansible/hosts
[hadoop-master]
centos.master ansible_ssh_host=192.168.137.129 ansible_connection=ssh ansible_ssh_user=root ansible_ssh_pass=jiang
[hive-master]
centos.master ansible_ssh_host=192.168.137.129 ansible_connection=ssh ansible_ssh_user=root ansible_ssh_pass=jiang
[spark]
centos.master ansible_ssh_host=192.168.137.129 ansible_connection=ssh ansible_ssh_user=root ansible_ssh_pass=jiang
[bigdata]
centos.master ansible_ssh_host=192.168.137.129 ansible_connection=ssh ansible_ssh_user=root ansible_ssh_pass=jiang
centos.agent1 ansible_ssh_host=192.168.137.130 ansible_connection=ssh ansible_ssh_user=root ansible_ssh_pass=jiang
centos.agent2 ansible_ssh_host=192.168.137.131 ansible_connection=ssh ansible_ssh_user=root ansible_ssh_pass=jiang
- 2 编写playbook
创建playbook文件,使用ansible的authoried_keys模块
mkdir -p /data/ansible/playbook/env
vim /data/ansible/playbook/env/nopasswd_LOGIN.yml
- hosts: spark
remote_user: root
gather_facts: no
tasks:
- name: create id_rsa
shell:
ssh-keygen -t rsa -f /root/.ssh/id_rsa -P ''
- hosts: bigdata
remote_user: root
gather_facts: no
tasks:
- name: ssh-copy
authorized_key: user=root key="{{ lookup('file', '/root/.ssh/id_rsa.pub') }}"
- name: copy-known_hosts
copy: src=/root/.ssh/known_hosts dest=/root/.ssh/known_hosts mode=0644
- name: copy-id_rsa
copy: src=/root/.ssh/id_rsa dest=/root/.ssh/id_rsa mode=0600
- name: copy-id_rsa.pub
copy: src=/root/.ssh/id_rsa.pub dest=/root/.ssh/id_rsa.pub mode=0644
- name: copy-authorized_keys
copy: src=/root/.ssh/authorized_keys dest=/root/.ssh/authorized_keys mode=0644
注:
- copy会检查文件md5查看是否需要拷贝,相同则不会拷贝,否则会拷贝。设置force=no(默认为yes)时,忽略同名文件
- 3 执行playbook
ansible-playbook /data/ansible/playbook/env/nopasswd_LOGIN.yml # 使用 -vvv 可以输出详细信息
输出信息:
PLAY [bigdata] ************************************************************
TASK [Gathering Facts] ****************************************************
ok: [192.168.137.129]
ok: [192.168.137.131]
ok: [192.168.137.130]
TASK [ssh-copy] ***********************************************************
changed: [192.168.137.131]
changed: [192.168.137.130]
changed: [192.168.137.129]
PLAY RECAP ****************************************************************
192.168.137.129 : ok=2 changed=1 unreachable=0 failed=0
192.168.137.130 : ok=2 changed=1 unreachable=0 failed=0
192.168.137.131 : ok=2 changed=1 unreachable=0 failed=0
- 4 验证测试
ssh 192.168.137.129
ssh 192.168.137.130
ssh 192.168.137.131
输出信息:
[root@centos ~]# ssh 192.168.137.129
Last login: Thu Sep 27 01:02:44 2018 from 192.168.137.1
[root@centos ~]# exit
登出
Connection to 192.168.137.129 closed.
[root@centos ~]# ssh 192.168.137.131
Last login: Thu Sep 27 16:02:44 2018 from 192.168.137.1
[root@centos2 ~]# exit
登出
Connection to 192.168.137.131 closed.
[root@centos ~]# ssh 192.168.137.130
Last login: Thu Sep 27 16:02:44 2018 from 192.168.137.1
[root@centos1 ~]# exit
登出
Connection to 192.168.137.130 closed.
2.2 Ansible命令测试
- 1 列出/data路径下文件
ansible bigdata -i /etc/ansible/hosts -m command -a 'ls /data'
输出信息:
[root@centos ~]# ansible bigdata -i /etc/ansible/hosts -m command -a 'ls /data'
192.168.137.130 | SUCCESS | rc=0 >>
ansible
redis
192.168.137.131 | SUCCESS | rc=0 >>
ansible
redis
192.168.137.129 | SUCCESS | rc=0 >>
ansible
参数解释::
- -i: 指定主机列表文件
- -u: 指定远程主机登陆用户
- -m:指定使用ansible的模块
- -a: 指定模块下使用的参数
- -k: 指定远程登陆用户的密码
- -s:sudo运行
- -U:sudo到那个用户,默认为root
- -T: ssh连接超时时间,默认10秒
- -t:日志输出到该目录,日志文件名以主机名命名
- -v:输出日志级别verbost
- 具体的可以参考 ansible -help
- 2 ping测试
ansible bigdata -i /etc/ansible/hosts -m ping
输出信息:
192.168.137.131 | SUCCESS => {
"changed": false,
"ping": "pong"
}
192.168.137.130 | SUCCESS => {
"changed": false,
"ping": "pong"
}
192.168.137.129 | SUCCESS => {
"changed": false,
"ping": "pong"
}
2.3 Ansible部署大数据平台
2.3.1 安装JDK
- 1 编写外部变量文件
外部变量文件放置在/data/ansible/playbook/install/bigdata/vars/bigdata_VARS.yml
mkdir /data/ansible/playbook/install/bigdata/vars
vim /data/ansible/playbook/install/bigdata/vars/bigdata_VARS.yml
---
# in the above example, this would be vars/external_vars.yml
## PACKAGES_DIR
JDK_PACKAGE_DIR: /data/ansible/playbook/install/bigdata/packages/jdk-8u172-linux-x64.rpm
- 2 编写playbook
创建playbook文件,使用ansible的yum模块
- hosts: bigdata
remote_user: root
gather_facts: no
vars_files:
- vars/bigdata_VARS.yml
tasks:
- name: remove java-*
yum:
name: java-*
state: removed
- name: remove jdk
yum:
name: jdk
state: removed
- name: copy-jdk
copy:
src: "{{ JDK_PACKAGE_DIR }}"
dest: /tmp/jdk-8u172-linux-x64.rpm
- name: install jdk
#shell: yum install -y /tmp/jdk-8u172-linux-x64.rpm
yum:
name: /tmp/jdk-8u172-linux-x64.rpm
state: present
- name: jdk variable configuration
lineinfile:
path: /etc/profile
regexp: "{{ item.regexp }}"
line: "{{ item.line }}"
with_items:
- { regexp: '^export JAVA_HOME=', line: 'export JAVA_HOME=/usr/java/jdk1.8.0_172-amd64' }
- { regexp: '^export JRE_HOME=', line: 'export JRE_HOME=$JAVA_HOME/jre' }
- { regexp: '^export CLASSPATH=.*JAVA_HOME', line: 'export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$JAVA_HOME/lib:$JAVA_HOME/jre/lib' }
- { regexp: '^export PATH=.*JAVA_HOME', line: 'export PATH=$PATH:$JAVA_HOME/bin' }
- 3 执行playbook
ansible-playbook /data/ansible/playbook/install/bigdata/install_JDK1.8.yml
- 4 验证测试
java -version
java version "1.8.0_172"
Java(TM) SE Runtime Environment (build 1.8.0_172-b11)
Java HotSpot(TM) 64-Bit Server VM (build 25.172-b11, mixed mode)
2.3.2 安装Hadoop
- 1 编写外部变量文件
在/data/ansible/playbook/install/bigdata/vars/bigdata_VARS.yml
新增变量
## PACKAGES_DIR
HADOOP_PACKAGE_DIR: /data/ansible/playbook/install/bigdata/packages/hadoop-2.7.3.tar.gz
## HADOOP_CONFIG
# hadoop-env.sh
JAVA_HOME: /usr/java/jdk1.8.0_172-amd64
# slaves
HADOOP_SLAVES: centos.agent1;centos.agent2
# core-site.xml
HADOOP_TMP_DIR: /data/hadoop/hdfs/tmp
FS_DEFAULT_NAME: centos.master
# hdfs-site.xml
DFS_REPLICATION: 3
DFS_NAMENODE_DIR: /data/hadoop/hdfs/name
DFS_DATANODE_ADDRESS: 0.0.0.0:8999
DFS_DATANODE_HTTP_ADDRESS: 0.0.0.0:8998
DFS_DATANODE_IPC_ADDRESS: 0.0.0.0:8997
DFS_NAMENODE_HTTP_ADDRESS: 0.0.0.0:8996
DFS_DATANODE_DIR: /data/hadoop/hdfs/data
DFS_NAMENODE_SECONDARY_HTTP_ADDRESS: centos.agent1:9001
DFS_WEBHDFS_ENABLED: 'true'
DFS_PERMISSIONS: 'false'
# yarn-site.xml
YARN_RESOURCEMANAGER_ADDRESS: centos.master:9002
YARN_RESOURCEMANAGER_SCHEDULER_ADDRESS: centos.master:9003
YARN_RESOURCEMANAGER_WEBAPP_ADDRESS: centos.master:9004
YARN_RESOURCEMANAGER_RESOURCE_TRACKER_ADDRESS: centos.master:9005
YARN_RESOURCEMANAGER_ADMIN_ADDRESS: centos.master:9006
YARN_NODEMANAGER_AUX_SERVICES: mapreduce_shuffle
# mapred-site.xml
MAPREDUCE_FRAMEWORK_NAME: yarn
- 2 编写templates文件
hadoop的templates文件放置在/data/ansible/playbook/install/bigdata/templates/hadoop_etc
路径下
mkdir -p /data/ansible/playbook/install/bigdata/templates/hadoop_etc
vim /data/ansible/playbook/install/bigdata/templates/hadoop_etc/hadoop-env.sh.j2
vim /data/ansible/playbook/install/bigdata/templates/hadoop_etc/core-site.xml.j2
vim /data/ansible/playbook/install/bigdata/templates/hadoop_etc/hdfs-site.xml.j2
vim /data/ansible/playbook/install/bigdata/templates/hadoop_etc/mapred-site.xml.j2
vim /data/ansible/playbook/install/bigdata/templates/hadoop_etc/slaves.j2
vim /data/ansible/playbook/install/bigdata/templates/hadoop_etc/yarn-site.xml.j2
hadoop-env.sh.j2
············
export JAVA_HOME={{JAVA_HOME}}
············
core-site.xml.j2
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>file:{{ HADOOP_TMP_DIR }}</value>
<description>A base for other temporary directories.</description>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://{{ FS_DEFAULT_NAME }}:9000</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
</configuration>
hdfs-site.xml.j2
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.replication</name>
<value>{{ DFS_REPLICATION }}</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:{{ DFS_NAMENODE_DIR }}</value>
<final>true</final>
</property>
<property>
<name>dfs.datanode.address</name>
<value>{{ DFS_DATANODE_ADDRESS }}</value>
<description>datanode port</description>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>{{ DFS_DATANODE_HTTP_ADDRESS }}</value>
<description>datanode http server</description>
</property>
<property>
<name>dfs.datanode.ipc.address</name>
<value>{{ DFS_DATANODE_IPC_ADDRESS }}</value>
<description> datanode ipc server</description>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>{{ DFS_NAMENODE_HTTP_ADDRESS }}</value>
<description>namenode http port </description>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:{{ DFS_DATANODE_DIR }}</value>
<final>true</final>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>{{ DFS_NAMENODE_SECONDARY_HTTP_ADDRESS }}</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>{{ DFS_WEBHDFS_ENABLED }}</value>
</property>
<property>
<name>dfs.permissions</name>
<value>{{ DFS_PERMISSIONS }}</value>
</property>
</configuration>
mapred-site.xml.j2
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>{{ MAPREDUCE_FRAMEWORK_NAME }}</value>
</property>
</configuration>
yarn-site.xml.j2
<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.resourcemanager.address</name>
<value>{{ YARN_RESOURCEMANAGER_ADDRESS }}</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>{{ YARN_RESOURCEMANAGER_SCHEDULER_ADDRESS }}</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>{{ YARN_RESOURCEMANAGER_WEBAPP_ADDRESS }}</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>{{ YARN_RESOURCEMANAGER_RESOURCE_TRACKER_ADDRESS }}</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>{{ YARN_RESOURCEMANAGER_ADMIN_ADDRESS }}</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>{{ YARN_NODEMANAGER_AUX_SERVICES }}</value>
</property>
</configuration>
slaves.j2
{{ HADOOP_SLAVES | regex_replace(';', '\n') }}
- 3 编写playbook
- hosts: bigdata
remote_user: root
gather_facts: no
vars_files:
- vars/bigdata_VARS.yml
tasks:
- name: create directory
file:
path: /usr/local/hadoop
state: directory
- name: install hadoop2.7.3
unarchive:
src: "{{ HADOOP_PACKAGE_DIR }}"
dest: /usr/local/hadoop
- name: hadoop variable configuration
lineinfile:
path: /etc/profile
regexp: "{{ item.regexp }}"
line: "{{ item.line }}"
with_items:
- { regexp: '^export HADOOP_HOME=', line: 'export HADOOP_HOME=/usr/local/hadoop/hadoop-2.7.3' }
- { regexp: '^export PATH=.*HADOOP_HOME', line: 'export PATH=$PATH:$HADOOP_HOME/bin' }
- name: create HADOOP_TMP_DIR
file:
path: "{{ HADOOP_TMP_DIR }}"
state: directory
mode: 0755
- name: create DFS_NAMENODE_DIR
file:
path: "{{ DFS_NAMENODE_DIR }}"
state: directory
mode: 0755
- name: create DFS_DATANODE_DIR
file:
path: "{{ DFS_DATANODE_DIR }}"
state: directory
mode: 0755
- name: hadoop hadoop-env.sh template
template:
src: templates/hadoop_etc/hadoop-env.sh.j2
dest: /usr/local/hadoop/hadoop-2.7.3/etc/hadoop/hadoop-env.sh
mode: 0755
- name: hadoop slaves template
template:
src: templates/hadoop_etc/slaves.j2
dest: /usr/local/hadoop/hadoop-2.7.3/etc/hadoop/slaves
- name: hadoop core-site.xml template
template:
src: templates/hadoop_etc/core-site.xml.j2
dest: /usr/local/hadoop/hadoop-2.7.3/etc/hadoop/core-site.xml
- name: hadoop hdfs-site.xml template
template:
src: templates/hadoop_etc/hdfs-site.xml.j2
dest: /usr/local/hadoop/hadoop-2.7.3/etc/hadoop/hdfs-site.xml
- name: hadoop yarn-site.xml template
template:
src: templates/hadoop_etc/yarn-site.xml.j2
dest: /usr/local/hadoop/hadoop-2.7.3/etc/hadoop/yarn-site.xml
- name: hadoop mapred-site.xml template
template:
src: templates/hadoop_etc/mapred-site.xml.j2
dest: /usr/local/hadoop/hadoop-2.7.3/etc/hadoop/mapred-site.xml
- name: set mesos configuration
lineinfile:
path: /opt/mesosphere/etc/mesos-slave
regexp: 'MESOS_HADOOP_HOME='
line: 'MESOS_HADOOP_HOME=/usr/local/hadoop/hadoop-2.7.3'
- hosts: hadoop-master
remote_user: root
gather_facts: no
vars_files:
- vars/bigdata_VARS.yml
tasks:
- name: format & start hdfs
shell: |
hadoop namenode -format
source /etc/profile
$HADOOP_HOME/sbin/start-all.sh
- 4 执行playbook
ansible-playbook /data/ansible/playbook/install/bigdata/install_HADOOP2.7.3.yml -i /data/ansible/playbook/install/bigdata/hosts
- 5 验证测试
hadoop version
jps
2.3.3 安装Hive
- 1 配置Mariadb数据库
docker pull mariadb:10.3.8
在DC/OS spark租户下创建应用:
{
"id": "/hive-mariadb",
"cmd": null,
"cpus": 0.1,
"mem": 3600,
"disk": 0,
"instances": 1,
"constraints": [
[
"hostname",
"CLUSTER",
"192.168.137.129"
]
],
"acceptedResourceRoles": [
"spark"
],
"container": {
"type": "DOCKER",
"volumes": [
{
"containerPath": "/var/lib/mysql",
"hostPath": "/home/mariadb/hive/db",
"mode": "RW"
}
],
"docker": {
"image": "docker.io/mariadb:10.3.8",
"network": "BRIDGE",
"portMappings": [
{
"containerPort": 3306,
"hostPort": 0,
"servicePort": 3306,
"protocol": "tcp",
"labels": {}
}
],
"privileged": false,
"parameters": [],
"forcePullImage": false
}
},
"env": {
"MYSQL_ROOT_PASSWORD": "root",
"TIMEZONE": "Asis/Shanghai",
"SERVER_ID": "1"
},
"labels": {
"HAPROXY_GROUP": "spark"
},
"portDefinitions": [
{
"port": 3306,
"protocol": "tcp",
"name": "default",
"labels": {}
}
],
"args": [
"--character-set-server",
"utf8mb4",
"--collation-server",
"utf8mb4_unicode_ci"
]
}
进入容器,创建hive数据库及用户:
create database hive character set utf8mb4 COLLATE utf8mb4_unicode_ci;
CREATE USER 'hive'@'%'IDENTIFIED BY 'bigdata';
GRANT ALL PRIVILEGES ON *.* TO 'hive'@'%';
FLUSH PRIVILEGES;
注::
- 后续使用Ansible进行数据库创建
- 2 编写外部变量文件
在/data/ansible/playbook/install/bigdata/vars/bigdata_VARS.yml
新增变量
## PACKAGES_DIR
HIVE_PACKAGE_DIR: /data/ansible/playbook/install/bigdata/packages/apache-hive-1.2.1-bin.tar.gz
MYSQL_CONNECTOR_JAVA_DIR: /data/ansible/playbook/install/bigdata/packages/mysql-connector-java-8.0.11.jar
## HIVE_CONFIG
# hive-site.xml
HIVE_EXEC_SCRATCHDIR: /data/hive
HIVE_EXEC_LOCAL_SCRATCHDIR: /data/hive/iotmp
HIVE_DOWNLOADED_RESOURCES_DIR: /data/hive/downloaded_resources
JAVAX_JDO_OPTION_CONNECTIONURL: 'jdbc:mysql://centos.master:3306/hive?createDatabaseIfNotExist=true'
JAVAX_JDO_OPTION_CONNECTIONDRIVERNAME: 'com.mysql.cj.jdbc.Driver'
JAVAX_JDO_OPTION_CONNECTIONUSERNAME: hive
JAVAX_JDO_OPTION_CONNECTIONPASSWORD: bigdata
HIVE_METASTORE_WAREHOUSE_DIR: /data/hive/warehouse
HIVE_METASTORE_URIS: 192.168.137.129:9083
HIVE_QUERYLOG_LOCATION: /data/hive/query_logs
HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION: /data/hive/operation_logs
- 3 编写templates文件
hive的templates文件放置在/data/ansible/playbook/install/bigdata/templates/hive_etc
路径下
mkdir -p /data/ansible/playbook/install/bigdata/templates/hive_etc
vim /data/ansible/playbook/install/bigdata/templates/hive_etc/hive-site.xml.j2
hive-site.xml.j2
············
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>{{ JAVAX_JDO_OPTION_CONNECTIONURL }}</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>{{ JAVAX_JDO_OPTION_CONNECTIONDRIVERNAME }}</value> <!--com.mysql.jdbc.Driver-->
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>{{ JAVAX_JDO_OPTION_CONNECTIONUSERNAME }}<value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>{{ JAVAX_JDO_OPTION_CONNECTIONPASSWORD }}</value>
<description>password to use against metastore database</description>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>{{ HIVE_METASTORE_WAREHOUSE_DIR }}</value>
<description>location of default database for the warehouse</description>
</property>
<property>
<name>hive.exec.scratchdir</name>
<value>{{ HIVE_EXEC_SCRATCHDIR }}</value>
<description>HDFS root scratch dir for Hive jobs which gets created with write all (733) permission. For each connecting user, an HDFS scratch dir: ${hive.exec.scratchdir}/<username> is created, with ${hive.scratch.dir.permission}.</description>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://{{ HIVE_METASTORE_URIS }}</value>
<description>Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore.</description>
</property>
<property>
<name>hive.querylog.location</name>
<value>{{ HIVE_QUERYLOG_LOCATION }}</value>
<description>Location of Hive run time structured log file</description>
</property>
<property>
<name>hive.server2.logging.operation.log.location</name>
<value>{{ HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION }}</value>
<description>Top level directory where operation logs are stored if logging functionality is enabled</description>
</property>
<property>
<name>hive.exec.local.scratchdir</name>
<value>{{ HIVE_EXEC_LOCAL_SCRATCHDIR }}</value>
<description>Local scratch space for Hive jobs</description>
</property>
<property>
<name>hive.downloaded.resources.dir</name>
<value>{{ HIVE_DOWNLOADED_RESOURCES_DIR }}</value>
<description>Temporary local directory for added resources in the remote file system.</description>
</property>
············
- 4 编写playbook
- hosts: bigdata
remote_user: root
gather_facts: no
vars_files:
- vars/bigdata_VARS.yml
tasks:
- name: create directory
file:
path: /usr/local/hive
state: directory
- name: install hive1.2.1
unarchive:
src: "{{ HIVE_PACKAGE_DIR }}"
dest: /usr/local/hive
- name: hive variable configuration
lineinfile:
path: /etc/profile
regexp: "{{ item.regexp }}"
line: "{{ item.line }}"
with_items:
- { regexp: '^export HIVE_HOME=', line: 'export HIVE_HOME=/usr/local/hive/apache-hive-1.2.1-bin' }
- { regexp: '^export HIVE_CONF_DIR=', line: 'export HIVE_CONF_DIR=$HIVE_HOME/conf' }
- { regexp: '^export CLASSPATH=.*HIVE_HOME', line: 'export CLASSPATH=$CLASSPATH:$HIVE_HOME/lib' }
- { regexp: '^export PATH=.*HIVE_HOME', line: 'export PATH=$PATH:$HIVE_HOME/bin' }
- name: create HADOOP_TMP_DIR
file:
path: "{{ HADOOP_TMP_DIR }}"
state: directory
mode: 0777
owner: spark
group: spark
- name: create HIVE_EXEC_LOCAL_SCRATCHDIR
file:
path: "{{ HIVE_EXEC_LOCAL_SCRATCHDIR }}"
state: directory
mode: 0777
owner: spark
group: spark
- name: create HIVE_DOWNLOADED_RESOURCES_DIR
file:
path: "{{ HIVE_DOWNLOADED_RESOURCES_DIR }}"
state: directory
mode: 0777
owner: spark
group: spark
- name: create HIVE_METASTORE_WAREHOUSE_DIR
file:
path: "{{ HIVE_METASTORE_WAREHOUSE_DIR }}"
state: directory
mode: 0777
owner: spark
group: spark
- name: create HIVE_QUERYLOG_LOCATION
file:
path: "{{ HIVE_QUERYLOG_LOCATION }}"
state: directory
mode: 0777
owner: spark
group: spark
- name: create HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION
file:
path: "{{ HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION }}"
state: directory
mode: 0777
owner: spark
group: spark
- name: hive hive-site.xml template
template:
src: templates/hive_etc/hive-site.xml.j2
dest: /usr/local/hive/apache-hive-1.2.1-bin/conf/hive-site.xml
- name: configure hive-env.sh
lineinfile:
path: /usr/local/hive/apache-hive-1.2.1-bin/conf/hive-env.sh
create: yes
mode: 0755
regexp: "{{ item.regexp }}"
line: "{{ item.line }}"
with_items:
- { regexp: '^export JAVA_HOME=', line: 'export JAVA_HOME=/usr/java/jdk1.8.0_172-amd64' }
- { regexp: '^export HADOOP_HOME=', line: 'export HADOOP_HOME=/usr/local/hadoop/hadoop-2.7.3' }
- { regexp: '^export HIVE_HOME=', line: 'export HIVE_HOME=/usr/local/hive/apache-hive-1.2.1-bin' }
- { regexp: '^export HIVE_CONF_DIR=', line: 'export HIVE_CONF_DIR=$HIVE_HOME/conf' }
- { regexp: '^export HIVE_AUX_JARS_PATH=', line: 'export HIVE_AUX_JARS_PATH=$HIVE_HOME/lib/*' }
- { regexp: '^export CLASSPATH=.*HIVE_HOME', line: 'export CLASSPATH=$CLASSPATH:$JAVA_HOME/lib:$HADOOP_HOME/lib:$HIVE_HOME/lib' }
- name: configure hive-log4j.properties
copy:
src: /usr/local/hive/apache-hive-1.2.1-bin/conf/hive-log4j.properties.template
dest: /usr/local/hive/apache-hive-1.2.1-bin/conf/hive-log4j.properties
- name: configure hive-start script
lineinfile:
path: /usr/local/hive/apache-hive-1.2.1-bin/bin/hive
regexp: '^sparkAssemblyPath='
line: 'sparkAssemblyPath=`ls ${SPARK_HOME}/jars/*.jar`'
- name: copy-mysql-connector
copy:
src: "{{ MYSQL_CONNECTOR_JAVA_DIR }}"
dest: /usr/local/hive/apache-hive-1.2.1-bin/lib
- hosts: hive-master
remote_user: root
gather_facts: no
vars_files:
- vars/bigdata_VARS.yml
tasks:
- name: format & start hdfs
shell: |
source /etc/profile
nohup hive --service metastore > /data/hive/hive_metastore.log 2>&1 &
- 4 执行playbook
ansible-playbook /data/ansible/playbook/install/bigdata/install_HIVE1.2.1.yml -i /data/ansible/playbook/install/bigdata/hosts
- 5 验证测试
hive
2.3.4 安装Spark
- 1 编写外部变量文件
在/data/ansible/playbook/install/bigdata/vars/bigdata_VARS.yml
新增变量
## USER_GROUP
SPARK_GROUP_NAME: spark
SPARK_GROUP_ID: 3030
SPARK_USER_NAME: spark
SPARK_USER_ID: 3030
# openssl passwd -1 "spark"
SPARK_USER_PASSWD: $1$9yN27ndz$8bpze1lOmyq3k1KJhxzwD0
SPARK_USER_HOME: /home/spark
SPARK_USER_SHELL: /bin/bash
## PACKAGES_DIR
SCALA_PACKAGE_DIR: /data/ansible/playbook/install/bigdata/packages/scala-2.11.8.tgz
SPARK_PACKAGE_DIR: /data/ansible/playbook/install/bigdata/packages/spark-2.2.0-bin-hadoop2.7.tgz
## SPARK_CONFIG
# spark-env.sh
MESOS_NATIVE_JAVA_LIBRARY: /opt/mesosphere/lib/libmesos.so
SPARK_EXECUTOR_URI: 'hdfs://192.168.137.129:9000/spark-2.2.0-bin-hadoop2.7/spark-2.2.0-bin-hadoop2.7.tgz'
# spark-defaults.conf
SPARK_IO_COMPRESSION_CODEC: lzf
SPARK_MESOS_ROLE: spark
SPARK_MESOS_TASK_LABELS: 'rack_id:spark'
SPARK_CORES_MAX: 8
SPARK_EXECUTOR_CORES: 2
SPARK_EXECUTOR_MEMORY: 4g
SPARK_DRIVER_CORES: 4
SPARK_DRIVER_MEMORY: 20g
SPARK_MEMORY_OFFHEAP_SIZE: 384
- 2 编写templates文件
spark的templates文件放置在/data/ansible/playbook/install/bigdata/templates/spark_etc
路径下
mkdir -p /data/ansible/playbook/install/bigdata/templates/spark_etc
vim /data/ansible/playbook/install/bigdata/templates/spark_etc/spark-env.sh.j2
vim /data/ansible/playbook/install/bigdata/templates/spark_etc/spark-defaults.conf.j2
spark-env.sh.j2
export MESOS_NATIVE_JAVA_LIBRARY={{ MESOS_NATIVE_JAVA_LIBRARY }}
export SPARK_EXECUTOR_URI={{ SPARK_EXECUTOR_URI }}
spark-defaults.conf.j2
spark.io.compression.codec {{ SPARK_IO_COMPRESSION_CODEC }}
spark.mesos.role {{ SPARK_MESOS_ROLE }}
spark.mesos.task.labels {{ SPARK_MESOS_TASK_LABELS }}
spark.cores.max {{ SPARK_CORES_MAX }}
spark.executor.cores {{ SPARK_EXECUTOR_CORES }}
spark.executor.memory {{ SPARK_EXECUTOR_MEMORY }}
spark.driver.cores {{ SPARK_DRIVER_CORES }}
spark.driver.memory {{ SPARK_DRIVER_MEMORY }}
spark.memory.offHeap.size {{ SPARK_MEMORY_OFFHEAP_SIZE }}
- 3 编写playbook
- hosts: bigdata
remote_user: root
gather_facts: no
vars_files:
- vars/bigdata_VARS.yml
tasks:
- name: create group
group:
name: "{{ SPARK_GROUP_NAME }}"
gid: "{{ SPARK_GROUP_ID }}"
state: present
- name: create user
user:
name: spark
password: "{{ SPARK_USER_PASSWD }}"
uid: "{{ SPARK_USER_ID }}"
group: "{{ SPARK_GROUP_NAME }}"
home: "{{ SPARK_USER_HOME }}"
shell: "{{ SPARK_USER_SHELL }}"
state: present
- name: reserve mesos static resources
shell: |
systemctl stop dcos-mesos-slave
echo "MESOS_DEFAULT_ROLE=spark" >> /opt/mesosphere/etc/mesos-slave
rm -rf /var/lib/mesos/*
rm -f /var/lib/dcos/mesos-resources
systemctl start dcos-mesos-slave.service
- hosts: spark
remote_user: root
gather_facts: no
vars_files:
- vars/bigdata_VARS.yml
tasks:
- name: create scala directory
file:
path: /usr/local/scala
state: directory
- name: install scala2.11.8
unarchive:
src: "{{ SCALA_PACKAGE_DIR }}"
dest: /usr/local/scala
- name: scala variable configuration
lineinfile:
path: /etc/profile
regexp: "{{ item.regexp }}"
line: "{{ item.line }}"
with_items:
- { regexp: '^export SCALA_HOME=', line: 'export SCALA_HOME=/usr/local/scala/scala-2.11.8' }
- { regexp: '^export PATH=.*SCALA_HOME', line: 'export PATH=$PATH:$SCALA_HOME/bin' }
# - name: install scala
# shell: |
# mkdir -p /usr/local/scala
# tar -zxf "{{ SCALA_PACKAGE_DIR }}" -C /usr/local/scala
# echo 'export SCALA_HOME=/usr/local/scala/scala-2.11.8' >> /etc/profile
# echo 'export PATH=$PATH:$SCALA_HOME/bin' >> /etc/profile
- name: create spark directory
file:
path: /usr/local/spark
state: directory
- name: install spark2.2.0
unarchive:
src: "{{ SPARK_PACKAGE_DIR }}"
dest: /usr/local/spark
- name: spark variable configuration
lineinfile:
path: /etc/profile
regexp: "{{ item.regexp }}"
line: "{{ item.line }}"
with_items:
- { regexp: '^export SPARK_HOME=', line: 'export SPARK_HOME=/usr/local/spark/spark-2.2.0-bin-hadoop2.7' }
- { regexp: '^export PATH=.*SPARK_HOME', line: 'export PATH=$PATH:$SPARK_HOME/bin' }
- name: spark spark-env.sh template
template:
src: templates/spark_etc/spark-env.sh.j2
dest: /usr/local/spark/spark-2.2.0-bin-hadoop2.7/conf/spark-env.sh
mode: 0755
- name: spark spark-defaults.conf template
template:
src: templates/spark_etc/spark-defaults.conf.j2
dest: /usr/local/spark/spark-2.2.0-bin-hadoop2.7/conf/spark-defaults.conf
- name: create spark log4j.properties
copy:
src: /usr/local/spark/spark-2.2.0-bin-hadoop2.7/conf/log4j.properties.template
dest: /usr/local/spark/spark-2.2.0-bin-hadoop2.7/conf/log4j.properties
- name: copy hadoop core-site.xml
copy:
src: /usr/local/hadoop/hadoop-2.7.3/etc/hadoop/core-site.xml
dest: /usr/local/spark/spark-2.2.0-bin-hadoop2.7/conf/core-site.xml
- name: copy hadoop hdfs-site.xml
copy:
src: /usr/local/hadoop/hadoop-2.7.3/etc/hadoop/hdfs-site.xml
dest: /usr/local/spark/spark-2.2.0-bin-hadoop2.7/conf/hdfs-site.xml
- name: copy hive hive-site.xml
copy:
src: /usr/local/hive/apache-hive-1.2.1-bin/conf/hive-site.xml
dest: /usr/local/spark/spark-2.2.0-bin-hadoop2.7/conf/hive-site.xml
- name: tar new spark.tgz & dispatch to hdfs
shell: |
tar -zPcf /tmp/spark-2.2.0-bin-hadoop2.7.tgz /usr/local/spark/spark-2.2.0-bin-hadoop2.7
hdfs dfs -mkdir /spark-2.2.0-bin-hadoop2.7
hdfs dfs -copyFromLocal /tmp/spark-2.2.0-bin-hadoop2.7.tgz /spark-2.2.0-bin-hadoop2.7
- 4 执行playbook
ansible-playbook /data/ansible/playbook/install/bigdata/install_SPARK2.2.0.yml -i /data/ansible/playbook/install/bigdata/hosts
- 5 验证测试
5.1 standalone模式
在192.168.137.129主机执行:
su - spark
spark-shell --master mesos://192.168.137.129:5050
5.2 cluster模式
在192.168.137.129主机执行:
su - spark
$SPARK_HOME/sbin/start-mesos-dispatcher.sh --master mesos://zk://192.168.137.129:2181,192.168.137.130:2181,192.168.137.131:2181/mesos