DORIS数据备份(HDFS)

DORIS数据备份(HDFS)

安装HADOOP

提前安装HADOOP,搭建一个单节点的HADOOP临时使用。
参考:https://blog.youkuaiyun.com/taoruicheng1/article/details/135114606

相关镜像:

docker pull apache/hadoop:3.3.5

安装docker compose

wget https://github.com/docker/compose/releases/download/v2.16.0/docker-compose-linux-x86_64
cp docker-compose-linux-x86_64 /usr/local/bin/docker-compose
chmod +x /usr/local/bin/docker-compose
docker-compose --version


配置 cat docker-compose.yaml  

(本样例中network使用的是本地 network_mode: host)

version: "3"
services:
   namenode:
      image: your_harbor/apache/hadoop:3.3.5
      hostname: namenode
      command: ["hdfs", "namenode"]
      user: "root:root"
      ports:
        - 9870:9870
        - 8020:8020
      volumes:
        - namenode:/tmp/hadoop-root/dfs
      env_file:
        - ./config.env
      privileged: true
      environment:
          ENSURE_NAMENODE_DIR: "/tmp/hadoop-root/dfs/name"
      network_mode: host
   datanode:
      image: your_harbor/apache/hadoop:3.3.5
      hostname: datanode
      command: ["hdfs", "datanode"]
      user: "root:root"
      env_file:
        - ./config.env
      privileged: true
      ports:
        - 9864:9864
        - 9866:9866
      volumes:
        - datanode:/tmp/hadoop-root/dfs
      network_mode: host
   resourcemanager:
      image: your_harbor/apache/hadoop:3.3.5
      hostname: resourcemanager
      command: ["yarn", "resourcemanager"]
      user: "root:root"
      ports:
         - 8088:8088
         - 8030:8030
         - 8031:8031
         - 8032:8032
         - 8033:8033
      env_file:
        - ./config.env
      volumes:
        - ./test.sh:/opt/test.sh
      network_mode: host
   nodemanager:
      image: your_harbor/apache/hadoop:3.3.5
      command: ["yarn", "nodemanager"]
      user: "root:root"
      env_file:
        - ./config.env
      ports:
         - 8042:8042
      network_mode: host
volumes:
  datanode:
  namenode:

配置config.env


注意 default.name 和 defaultFS 配置,Doris创建备份仓库的时候要用到

CORE-SITE.XML_fs.default.name=hdfs://namenode
CORE-SITE.XML_fs.defaultFS=hdfs://namenode
CORE-SITE.XML_hadoop.http.staticuser.user=root
CORE-SITE.XML_hadoop.tmp.dir=/tmp/hadoop-root
HDFS-SITE.XML_dfs.namenode.rpc-address=namenode:8020
HDFS-SITE.XML_dfs.replication=1
MAPRED-SITE.XML_mapreduce.framework.name=yarn
MAPRED-SITE.XML_yarn.app.mapreduce.am.env=HADOOP_MAPRED_HOME=${HADOOP_HOME}
MAPRED-SITE.XML_mapreduce.map.env=HADOOP_MAPRED_HOME=${HADOOP_HOME}
MAPRED-SITE.XML_mapreduce.reduce.env=HADOOP_MAPRED_HOME=${HADOOP_HOME}
MAPRED-SITE.XML_mapreduce.jobhistory.address=0.0.0.0:10020
MAPRED-SITE.XML_mapreduce.jobhistory.webapp.address=0.0.0.0:19888
YARN-SITE.XML_yarn.resourcemanager.hostname=resourcemanager
YARN-SITE.XML_yarn.nodemanager.pmem-check-enabled=true
YARN-SITE.XML_yarn.nodemanager.delete.debug-delay-sec=600
YARN-SITE.XML_yarn.nodemanager.vmem-check-enabled=true
YARN-SITE.XML_yarn.nodemanager.aux-services=mapreduce_shuffle
YARN-SITE.XML_yarn.nodemanager.resource.cpu-vcores=4
YARN-SITE.XML_yarn.application.classpath=opt/hadoop/etc/hadoop:/opt/hadoop/share/hadoop/common/lib/*:/opt/hadoop/share/hadoop/common/*:/opt/hadoop/share/hadoop/hdfs:/opt/hadoop/share/hadoop/hdfs/lib/*:/opt/hadoop/share/hadoop/hdfs/*:/opt/hadoop/share/hadoop/mapreduce/*:/opt/hadoop/share/hadoop/yarn:/opt/hadoop/share/hadoop/yarn/lib/*:/opt/hadoop/share/hadoop/yarn/*
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-applications=10000
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-am-resource-percent=0.1
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.resource-calculator=org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.queues=default
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.capacity=100
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.user-limit-factor=1
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.maximum-capacity=100
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.state=RUNNING
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_submit_applications=*
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_administer_queue=*
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.node-locality-delay=40
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings=
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings-override.enable=false

touch一个测试文件

touch test.sh

启动HADOOP

# 启动
docker-compose up -d
docker-compose ps
# 关闭  --rmi all
docker-compose down
docker-compose ps

查看日志
docker logs -f hadoop-namenode-1
docker logs -f hadoop-datanode-1


也可以测试一下功能是否好用

#-put命令:从本地文件系统拷贝到HDFS,其中/xxx/xxx/为hdfs中的路径
hdfs dfs -put bootstrap.sh /doris
#-copyFromLocal命令:从本地文件系统拷贝到HDFS,效果与-put命令等同
hdfs dfs -copyFromLocal b.txt /usr/opt/data
#-moveFromLocal命令:从本地文件系统剪切到HDFS,命令执行完后本地文件就没有了
hdfs dfs -moveFromLocal c.txt /usr/opt/data


配置hosts


指向你的HADOOP机器,确保DORIS中各fe/be节点能识别这些域名

cat /etc/hosts
echo "

192.168.12.123 datanode
192.168.12.123 namenode
192.168.12.123 resourcemanager

" >> /etc/hosts
cat /etc/hosts

访问web界面
9870:Namenode 的web界面端口


http://192.168.12.123:9870

配置DORIS仓库


使用root登录mysql客户端

mysql -uroot -P9030 -h 127.0.0.1


创建备份仓库

  注意fs.defaultFS和fs.default.name配置
  HADOOP集群目前只是单节点,所以复本数先配置为1,避免异常发生
  could only be written to 0 of the 1 minReplication nodes. There are 3 datanode(s) running and 3 node(s) are excluded in this operation.

CREATE REPOSITORY hdfs_repo
WITH hdfs
ON LOCATION "hdfs://namenode/doris/hdfs_repo/"
PROPERTIES
(   
    "fs.defaultFS" = "hdfs://namenode",
    "fs.default.name" = "hdfs://namenode",
    "hadoop.username" = "root",
    "dfs.replication" = "1",
    "dfs.client.use.datanode.hostname" = "true",
    "dfs.client.use.namenode.hostname" = "true"
);

SHOW CREATE REPOSITORY for hdfs_repo;

备份数据库


  your_db_name 全库备份

BACKUP SNAPSHOT your_db_name.tag_date_202501
TO hdfs_repo;

show BACKUP\G;

数据有异常重新删除再来

CANCEL BACKUP FROM your_db_name;
DROP REPOSITORY hdfs_repo;
SHOW REPOSITORIES;

查看备份的快照

SHOW SNAPSHOT ON hdfs_repo;

恢复数据

(可以A库备份,B库恢复;在B库上创建同样的仓库hdfs_repo,即可查询到A库备份的数据)
 

RESTORE SNAPSHOT your_db_name.tag_date_202501
FROM hdfs_repo
PROPERTIES
(
    "backup_timestamp"="2025-01-25-06-31-09",  //通过查询快照,可以看到这个时间戳
    "replication_num" = "1"
);

SHOW RESTORE FROM your_db_name\G;


DONE!
 

评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值