离线数仓一波流安装
1、集群规划
https://yellow520.blog.youkuaiyun.com/article/details/115536550
2、网络配置+免密登录
https://yellow520.blog.youkuaiyun.com/article/details/113073636
3、环境变量
https://yellow520.blog.youkuaiyun.com/article/details/112692486
4、MySQL
https://blog.youkuaiyun.com/Yellow_python/article/details/113036158
5、解压,改名,改户主
tar -zxf jdk-8u212-linux-x64.tar.gz -C /opt/
tar -zxf hadoop-3.1.3.tar.gz -C /opt/
tar -zxf apache-hive-3.1.2-bin.tar.gz -C /opt/
cd /opt
mv hadoop-3.1.3 hadoop
mv jdk1.8.0_212 jdk
mv apache-hive-3.1.2-bin hive
chown -R root:root /opt
ll
6、Hadoop
6.1、基础配置
Hadoop核心配置,加入configuration
vim $HADOOP_HOME/etc/hadoop/core-site.xml
<!-- NameNode服务地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop100:8020</value>
</property>
<!-- Hadoop数据总目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop/data</value>
</property>
<!-- 配置HDFS网页登录使用的静态用户为root -->
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
HDFS配置,加入configuration
vim $HADOOP_HOME/etc/hadoop/hdfs-site.xml
<!-- SecondaryNameNode服务地址 -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop101:9868</value>
</property>
YARN配置,加入configuration
vim $HADOOP_HOME/etc/hadoop/yarn-site.xml
<!-- 配置成 mapreduce_shuffle 才可运行 MapReduce -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- ResourceManager主机名 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop102</value>
</property>
<!-- https://yellow520.blog.youkuaiyun.com/article/details/115724120 -->
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
<!-- 不 检查物理内存 -->
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<!-- 不 检查虚拟内存 -->
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<!-- 开启日志聚集 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 浏览器访问的URL -->
<property>
<name>yarn.log.server.url</name>
<value>http://hadoop100:19888/jobhistory/logs</value>
</property>
<!-- 保存的时间7天(3600*24*7) -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
MapReduce配置,加入configuration
vim $HADOOP_HOME/etc/hadoop/mapred-site.xml
<!-- 让 MapReduce 运行在 YARN 上 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
从机配置,
-e
指(enable interpretation of backslash escapes)启用反斜杠转义的解释
echo -e "hadoop100\nhadoop101\nhadoop102" > $HADOOP_HOME/etc/hadoop/workers
cat $HADOOP_HOME/etc/hadoop/workers
6.2、YARN调度器配置
https://yellow520.blog.youkuaiyun.com/article/details/116021592
6.3、文件分发
rsync -a $JAVA_HOME/ hadoop101:$JAVA_HOME/
rsync -a $JAVA_HOME/ hadoop102:$JAVA_HOME/
rsync -a $HADOOP_HOME/ hadoop101:$HADOOP_HOME/
rsync -a $HADOOP_HOME/ hadoop102:$HADOOP_HOME/
6.4、Hadoop集群启动
1、要格式化NameNode(在hadoop100),只格1次
hdfs namenode -format
2、集群启停命令
start-dfs.sh
ssh hadoop102 "start-yarn.sh"
stop-dfs.sh
ssh hadoop102 "stop-yarn.sh"
3、YARN历史服务器 启停命令
mapred --daemon start historyserver
mapred --daemon stop historyserver
7、HIVE
1、上传MySQL的JDBC到HIVE的
lib
下
cp mysql-connector-java-5.1.49.jar $HIVE_HOME/lib/
2、删除HIVE日志jar冲突
cd $HIVE_HOME/lib
mv log4j-slf4j-impl-2.10.0.jar log4j-slf4j-impl-2.10.0.jar.bak
3、元数据库配到MySQL
vim $HIVE_HOME/conf/hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 元数据配到MySQL -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://hadoop100:3306/hive?createDatabaseIfNotExist=true&useUnicode=true&characterEncoding=UTF-8&useSSL=false</value>
<description>元数据配到MySQL的库名(hive),自动建库,UTF-8字符集,不用SSL</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>MySQL的JDBC驱动</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>mysql用户名</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
<description>mysql密码</description>
</property>
<!-- 不 开启HIVE元数据验证 -->
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
</configuration>
4、元数据初始化
$HIVE_HOME/bin/schematool -initSchema -dbType mysql -verbose
5、解决元数据中文乱码
mysql -uroot -p123456
USE hive
ALTER TABLE COLUMNS_V2 MODIFY COLUMN `COMMENT` VARCHAR(256) CHARACTER SET utf8;
ALTER TABLE TABLE_PARAMS MODIFY COLUMN PARAM_VALUE VARCHAR(4000) CHARACTER SET utf8;
ALTER TABLE PARTITION_PARAMS MODIFY COLUMN PARAM_VALUE VARCHAR(4000) CHARACTER SET utf8;
ALTER TABLE PARTITION_KEYS MODIFY COLUMN PKEY_COMMENT VARCHAR(4000) CHARACTER SET utf8;
ALTER TABLE INDEX_PARAMS MODIFY COLUMN PARAM_VALUE VARCHAR(4000) CHARACTER SET utf8;
quit;
6、测试HIVE
hive -e 'CREATE TABLE t(f STRING COMMENT "中")COMMENT "文";
SHOW CREATE TABLE t;
INSERT INTO TABLE t VALUES("汉语");
SELECT * FROM t;
DROP TABLE t;
SHOW TABLES;'