概念性的东西就不讲了,直接上步骤
实验准备:
所有节点 root 用户密码为 123123,这里只规划了 mon、mgr、osd 等组件节点,如需其他组件请按需求进行添加
主机名 | Pbulic network | Cluster network | Role |
ceph-deploy | 192.168.19.10 | 管理节点负责集群整体部署 | |
ceph-mon1 | 192.168.19.11 | 192.168.60.5 | mon1 |
ceph-mon2 | 192.168.19.12 | 192.168.60.6 | mon2 |
ceph-mon3 | 192.168.19.13 | 192.168.60.7 | mon3 |
ceph-mgr1 | 192.168.19.14 | 192.168.60.8 | mgr1 |
ceph-mgr2 | 192.168.19.15 | 192.168.60.9 | mgr2 |
ceph-osd1 | 192.168.19.16 | 192.168.60.10 | osd1(/dev/sdb、/dev/sdc、/dev/sdd) |
ceph-osd2 | 192.168.19.17 | 192.168.60.11 | osd2(/dev/sdb、/dev/sdc、/dev/sdd) |
ceph-osd3 | 192.168.19.18 | 192.168.60.12 | osd3(/dev/sdb、/dev/sdc、/dev/sdd) |
一、初始化管理节点
1.1 修改管理节点 hostname
[root@localhost ~]# hostnamectl set-hostname ceph-deploy
[root@localhost ~]# exec bash
[root@ceph-deploy ~]#
1.2 管理节点添加 hosts 解析
[root@ceph-deploy ~]# cat <<EOT >> /etc/hosts
> #Ceph deploy
> 192.168.19.10 ceph-deploy
>
> #Ceph Public Network
> 192.168.19.11 ceph-mon1
> 192.168.19.12 ceph-mon2
> 192.168.19.13 ceph-mon3
> 192.168.19.14 ceph-mgr1
> 192.168.19.15 ceph-mgr2
> 192.168.19.16 ceph-osd1
> 192.168.19.17 ceph-osd2
> 192.168.19.18 ceph-osd3
>
> #Ceph Cluster Network
> 192.168.60.5 ceph-mon1
> 192.168.60.6 ceph-mon2
> 192.168.60.7 ceph-mon3
> 192.168.60.8 ceph-mgr1
> 192.168.60.9 ceph-mgr2
> 192.168.60.10 ceph-osd1
> 192.168.60.11 ceph-osd2
> 192.168.60.12 ceph-osd3
> EOT
1.3 修改 yum 源
#centos基础源
[root@ceph-deploy ~]# mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
[root@ceph-deploy ~]# curl -o /etc/yum.repos.d/CentOS-Base.repo https://mirrors.aliyun.com/repo/Centos-7.repo
#安装epel扩展源
[root@ceph-deploy ~]# yum install -y epel-release
[root@ceph-deploy ~]# yum clean all
[root@ceph-deploy ~]# yum makecache
1.4 关闭 firewalld 和 selinux
#关闭防火墙和selinux
systemctl disable --now firewalld
setenforce 0
sed -i 's/^SELINUX=.*/SELINUX=disabled/' /etc/selinux/config
1.5 配置时间同步
#配置时间同步
yum install -y chrony
systemctl enable --now chronyd
1.6 创建 cephadm 用户
[root@ceph-deploy ~]# useradd cephadm
[root@ceph-deploy ~]# echo "cephadm" | sudo passwd --stdin cephadm
[root@ceph-deploy ~]# vim /etc/sudoers
#在文中指定位置添加以下内容
## Same thing without a password
# %wheel ALL=(ALL) NOPASSWD: ALL
cephadm ALL=(ALL) NOPASSWD: ALL
二、因 ceph 组件较多,选择控制节点安装 Ansible 对所有节点进行初始化操作
[root@ceph-deploy ~]# yum install -y ansible
2.1 配置Inventory 文件
[root@ceph-deploy ~]# vim inventory
[all]
ceph-mon1 ansible_host=192.168.19.11
ceph-mon2 ansible_host=192.168.19.12
ceph-mon3 ansible_host=192.168.19.13
ceph-mgr1 ansible_host=192.168.19.14
ceph-mgr2 ansible_host=192.168.19.15
ceph-osd1 ansible_host=192.168.19.16
ceph-osd2 ansible_host=192.168.19.17
ceph-osd3 ansible_host=192.168.19.18
2.2 生成并推送密钥
[root@ceph-deploy ~]# ssh-keygen
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
Created directory '/root/.ssh'.
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
The key fingerprint is:
SHA256:4xiz+lyCPbsSS6eFmO9+RvcGV7CBXDdrbs0H2GFHlAI root@ceph-deploy
The key's randomart image is:
+---[RSA 2048]----+
| . o.E+ oo=|
| o o. B + |
| ++ + |
| .o.o . |
| o .o S .o o .|
| o +o+*o... . |
| o.B*.o+ |
| =oo= o |
| o+==. . |
+----[SHA256]-----+
[root@ceph-deploy ~]# for ip in {11..18}; do sshpass -p 123123 ssh-copy-id -o StrictHostKeyChecking=no root@192.168.19.$ip; done
[root@ceph-deploy ~]# ssh root@192.168.19.11
Last login: Fri Mar 21 19:02:04 2025 from 192.168.19.1
[root@localhost ~]# exit
2.3 创建 playbook 来修改 hosts 文件
[root@ceph-deploy ~]# cat copy_hosts.yml
---
- name: Copy local /etc/hosts to all nodes
hosts: all
become: yes
tasks:
- name: Copy /etc/hosts from control node to all nodes
copy:
src: /etc/hosts
dest: /etc/hosts
owner: root
group: root
mode: '0644'
backup: yes
[root@ceph-deploy ~]# ansible-playbook -i inventory copy_hosts.yml
2.4 关闭所有节点 firewalld 与 selinux
[root@ceph-deploy ~]# cat disable_firewall_selinux.yml
---
- name: Disable firewall and SELinux on all nodes
hosts: all
become: yes
tasks:
- name: Disable firewalld
ansible.builtin.service:
name: firewalld
state: stopped
enabled: no
- name: Disable SELinux (immediate)
ansible.builtin.command: /usr/sbin/setenforce 0
- name: Disable SELinux (permanently)
ansible.builtin.lineinfile:
path: /etc/selinux/config
regexp: '^SELINUX='
line: 'SELINUX=disabled'
[root@ceph-deploy ~]# ansible-playbook -i inventory disable_firewall_selinux.yml
2.5 修改 yum 源,安装 epel 源并添加阿里云 ceph 源,此源指定了 ceph 的版本
[root@ceph-deploy ~]# cat update_yum_repo.yml
---
- name: Update YUM repository to Alibaba Cloud mirror and install EPEL for all nodes
hosts: all
become: yes
tasks:
- name: Backup the original CentOS-Base.repo
ansible.builtin.command:
cmd: mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
creates: /etc/yum.repos.d/CentOS-Base.repo.backup
- name: Download Alibaba Cloud CentOS 7 repo file
ansible.builtin.get_url:
url: https://mirrors.aliyun.com/repo/Centos-7.repo
dest: /etc/yum.repos.d/CentOS-Base.repo
mode: '0644'
- name: Install EPEL repository
ansible.builtin.yum:
name: epel-release
state: present
- name: Add Alibaba Cloud Ceph repository
ansible.builtin.copy:
dest: /etc/yum.repos.d/ceph.repo
content: |
[Ceph]
name=Ceph packages for \$basearch
baseurl=http://mirrors.aliyun.com/ceph/rpm-nautilus/el7/\$basearch
enabled=1
gpgcheck=1
type=rpm-md
gpgkey=https://download.ceph.com/keys/release.asc
[Ceph-noarch]
name=Ceph noarch packages
baseurl=http://mirrors.aliyun.com/ceph/rpm-nautilus/el7/noarch
enabled=1
gpgcheck=1
type=rpm-md
gpgkey=https://download.ceph.com/keys/release.asc
[ceph-source]
name=Ceph source packages
baseurl=http://mirrors.aliyun.com/ceph/rpm-nautilus/el7/SRPMS
enabled=1
gpgcheck=1
type=rpm-md
gpgkey=https://download.ceph.com/keys/release.asc
mode: '0644'
- name: Clean and update YUM cache
ansible.builtin.yum:
update_cache: yes
[root@ceph-deploy ~]# ansible-playbook -i inventory update_yum_repo.yml
2.6 安装时间同步
[root@ceph-deploy ~]# cat install_chrony.yml
---
- name: Configure time synchronization with chrony on all nodes
hosts: all
become: yes
tasks:
- name: Install chrony
ansible.builtin.yum:
name: chrony
state: present
- name: Enable and start chronyd service
ansible.builtin.systemd:
name: chronyd
enabled: yes
state: started
[root@ceph-deploy ~]# ansible-playbook -i inventory install_chrony.yml
2.7 在所有节点创建 cephadm 用户并提权
[root@ceph-deploy ~]# cat create_user.yml
---
- name: Create cephadm user on all nodes
hosts: all
become: yes
tasks:
- name: Create user cephadm
user:
name: cephadm
password: "{{ 'cephadm' | password_hash('sha512') }}"
shell: /bin/bash
state: present
- name: Ensure cephadm user has sudo privileges
lineinfile:
path: /etc/sudoers
line: 'cephadm ALL=(ALL) NOPASSWD:ALL'
state: present
validate: 'visudo -cf %s'
[root@ceph-deploy ~]# ansible-playbook -i inventory create_user.yml
2.8 在所有节点上都安装 python3
[root@ceph-deploy ~]# cat install_python3.yml
---
- name: Install Python 3 on all CentOS nodes
hosts: all
become: yes
tasks:
- name: Install Python 3
yum:
name: python3
state: present
- name: Ensure Python 3 is the default
raw: |
if [ ! -e /usr/bin/python ]; then
ln -s /usr/bin/python3 /usr/bin/python
fi
when: >
ansible_python_version is not defined or
(ansible_python_version is defined and not ansible_python_version.startswith('3'))
[root@ceph-deploy ~]# ansible-playbook -i inventory install_python3.yml
2.9 设置 cephadm 用户 SSH 互信
[root@ceph-deploy ~]# su - cephadm
上一次登录:五 3月 21 22:04:21 CST 2025从 192.168.19.1pts/3 上
[cephadm@ceph-deploy ~]$ ssh-keygen
[cephadm@ceph-deploy ~]$ exit
logout
[root@ceph-deploy ~]# cat push_cephadm_ssh_key.yml
---
- name: Push SSH public key to all nodes for cephadm user
hosts: all
become: yes
tasks:
- name: Ensure ~/.ssh directory exists for cephadm
file:
path: /home/cephadm/.ssh
state: directory
mode: '0700'
owner: cephadm
group: cephadm
- name: Add SSH public key to authorized_keys for cephadm
authorized_key:
user: cephadm
key: "{{ lookup('file', '/home/cephadm/.ssh/id_rsa.pub') }}"
state: present
[root@ceph-deploy ~]# ansible-playbook -i inventory push_cephadm_ssh_key.yml
三、开始部署 ceph
3.1 控制节点安装 ceph-deploy 组件
[root@ceph-deploy ~]# yum update -y
[root@ceph-deploy ~]# yum install -y python-setuptools python3-pip
[root@ceph-deploy ~]# pip3 install ceph-deploy==2.0.1
[root@ceph-deploy ~]# ceph-deploy --version
2.0.1
3.2 初始化失败请执行 Clear 操作,仅控制节点执行
ceph-deploy purge {ceph-node} [{ceph-node}]
ceph-deploy purgedata {ceph-node} [{ceph-node}]
ceph-deploy forgetkeys
rm ceph.*
#如果执行purge,则必须重新安装Ceph。最后一个rm命令删除在先前安装过程中由ceph-deploy在本地写出的所有文件。
3.3 初始化 ceph 集群
#初始化 Ceph 集群配置文件,切换至cephadm用户进行部署
[root@ceph-deploy ~]# su - cephadm
上一次登录:五 3月 21 23:21:29 CST 2025pts/0 上
[cephadm@ceph-deploy ~]$ pwd
/home/cephadm
#创建my-cluster目录用于存放ceph相关配置文件和密钥
[cephadm@ceph-deploy ~]$ mkdir my-cluster
[cephadm@ceph-deploy ~]$ cd my-cluster/
[cephadm@ceph-deploy my-cluster]$ pwd
/home/cephadm/my-cluster
[cephadm@ceph-deploy my-cluster]$ ceph-deploy new --public-network 192.168.19.0/24 --cluster-network 192.168.60.0/24 ceph-mon1 ceph-mon2 ceph-mon3
#验证ceph.conf文件
[cephadm@ceph-deploy my-cluster]$ ll
total 16
-rw-rw-r--. 1 cephadm cephadm 317 Mar 21 23:26 ceph.conf
-rw-rw-r--. 1 cephadm cephadm 5601 Mar 21 23:26 ceph-deploy-ceph.log
-rw-------. 1 cephadm cephadm 73 Mar 21 23:26 ceph.mon.keyring
[cephadm@ceph-deploy my-cluster]$ cat ceph.conf
[global]
fsid = 0f0abd0c-00d1-4a51-8517-31cb99f76cb3
public_network = 192.168.19.0/24
cluster_network = 192.168.60.0/24
mon_initial_members = ceph-mon1, ceph-mon2, ceph-mon3
mon_host = 192.168.19.11,192.168.19.12,192.168.19.13
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
[cephadm@ceph-deploy my-cluster]$
3.4 由于使用 ceph-deploy install 进行节点部署时一直使用官方源,导致超时。这里改为使用 ansible 在每个节点上进行手动安装
[cephadm@ceph-deploy my-cluster]$ exit
logout
[root@ceph-deploy ~]# pwd
/root
[root@ceph-deploy ~]# ll
总用量 40
-rw-------. 1 root root 1797 3月 21 18:55 anaconda-ks.cfg
-rw-r--r--. 1 root root 284 3月 21 21:05 copy_hosts.yml
-rw-r--r--. 1 root root 459 3月 21 22:12 create_user.yml
-rw-r--r--. 1 root root 485 3月 21 21:16 disable_firewall_selinux.yml
-rw-r--r--. 1 root root 345 3月 21 21:43 install_chrony.yml
-rw-r--r--. 1 root root 483 3月 21 23:20 install_python3.yml
-rw-r--r--. 1 root root 302 3月 21 21:10 inventory
-rw-r--r--. 1 root root 489 3月 21 22:27 push_cephadm_ssh_key.yml
-rw-r--r--. 1 root root 192 3月 21 21:11 set_hostname.yml
-rw-r--r--. 1 root root 1705 3月 22 00:03 update_yum_repo.yml
[root@ceph-deploy ~]# vim install_ceph.yml
[root@ceph-deploy ~]# cat install_ceph.yml
---
- hosts: all
become: yes
tasks:
- name: Install Ceph and Ceph RadosGW
yum:
name:
- ceph
- ceph-radosgw
state: present
[root@ceph-deploy ~]# ansible-playbook -i inventory install_ceph.yml
3.5 使用 cephadm 用户继续进行部署
[root@ceph-deploy ~]# su cephadm
[cephadm@ceph-deploy root]$ cd
[cephadm@ceph-deploy ~]$ cd my-cluster/
[cephadm@ceph-deploy my-cluster]$ pwd
/home/cephadm/my-cluster
[cephadm@ceph-deploy my-cluster]$ ll
总用量 52
-rw-rw-r--. 1 cephadm cephadm 317 3月 21 23:26 ceph.conf
-rw-rw-r--. 1 cephadm cephadm 44199 3月 22 00:10 ceph-deploy-ceph.log
-rw-------. 1 cephadm cephadm 73 3月 21 23:26 ceph.mon.keyring
[cephadm@ceph-deploy my-cluster]$ ceph-deploy mon create-initial
#初始化完毕后会自动生成以下几个文件,这些文件用于后续和Ceph认证交互使用
[cephadm@ceph-deploy my-cluster]$ ll
总用量 156
-rw-------. 1 cephadm cephadm 113 3月 22 00:27 ceph.bootstrap-mds.keyring
-rw-------. 1 cephadm cephadm 113 3月 22 00:27 ceph.bootstrap-mgr.keyring
-rw-------. 1 cephadm cephadm 113 3月 22 00:27 ceph.bootstrap-osd.keyring
-rw-------. 1 cephadm cephadm 113 3月 22 00:27 ceph.bootstrap-rgw.keyring
-rw-------. 1 cephadm cephadm 151 3月 22 00:27 ceph.client.admin.keyring
-rw-rw-r--. 1 cephadm cephadm 317 3月 21 23:26 ceph.conf
-rw-rw-r--. 1 cephadm cephadm 81744 3月 22 00:27 ceph-deploy-ceph.log
-rw-------. 1 cephadm cephadm 73 3月 21 23:26 ceph.mon.keyring
[cephadm@ceph-deploy my-cluster]$ ceph-deploy mgr create ceph-mgr1 ceph-mgr2
#每个OSD节点有sdb、sdc、sdd三块盘需要添加,这里以osd1为例
[root@ceph-osd1 ~]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sda 8:0 0 20G 0 disk
├─sda1 8:1 0 1G 0 part /boot
└─sda2 8:2 0 19G 0 part
├─centos-root 253:0 0 17G 0 lvm /
└─centos-swap 253:1 0 2G 0 lvm [SWAP]
sdb 8:16 0 50G 0 disk
sdc 8:32 0 50G 0 disk
sdd 8:48 0 50G 0 disk
[cephadm@ceph-deploy my-cluster]$ ceph-deploy osd create --data /dev/sdb ceph-osd1
[cephadm@ceph-deploy my-cluster]$ ceph-deploy osd create --data /dev/sdb ceph-osd2
[cephadm@ceph-deploy my-cluster]$ ceph-deploy osd create --data /dev/sdb ceph-osd3
[cephadm@ceph-deploy my-cluster]$ ceph-deploy osd create --data /dev/sdc ceph-osd1
[cephadm@ceph-deploy my-cluster]$ ceph-deploy osd create --data /dev/sdc ceph-osd2
[cephadm@ceph-deploy my-cluster]$ ceph-deploy osd create --data /dev/sdc ceph-osd3
[cephadm@ceph-deploy my-cluster]$ ceph-deploy osd create --data /dev/sdd ceph-osd1
[cephadm@ceph-deploy my-cluster]$ ceph-deploy osd create --data /dev/sdd ceph-osd2
[cephadm@ceph-deploy my-cluster]$ ceph-deploy osd create --data /dev/sdd ceph-osd3
#在osd节点可以看到ceph将磁盘创建为lvm格式然后加入ceph集群
[root@ceph-osd1 ~]# pvs |grep ceph
/dev/sdb ceph-6b3c8e6a-d2c9-4902-8a0c-7d1987cb0f97 lvm2 a-- <50.00g 0
/dev/sdc ceph-832a6c4d-4cbb-4d9f-9dfe-e4e186b575d7 lvm2 a-- <50.00g 0
/dev/sdd ceph-97b80c80-b520-4073-9cda-544c7ce46c94 lvm2 a-- <50.00g 0
[cephadm@ceph-deploy my-cluster]$ ceph-deploy admin ceph-mon1 ceph-mon2 ceph-mon3 ceph-mgr1 ceph-mgr2 ceph-osd1 ceph-osd2 ceph-osd3
四、部署完成查看 ceph 状态
#查看osd的列表情况
[root@ceph-mon1 ~]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.43918 root default
-3 0.14639 host ceph-osd1
0 hdd 0.04880 osd.0 up 1.00000 1.00000
3 hdd 0.04880 osd.3 up 1.00000 1.00000
4 hdd 0.04880 osd.4 up 1.00000 1.00000
-5 0.14639 host ceph-osd2
1 hdd 0.04880 osd.1 up 1.00000 1.00000
5 hdd 0.04880 osd.5 up 1.00000 1.00000
7 hdd 0.04880 osd.7 up 1.00000 1.00000
-7 0.14639 host ceph-osd3
2 hdd 0.04880 osd.2 up 1.00000 1.00000
6 hdd 0.04880 osd.6 up 1.00000 1.00000
8 hdd 0.04880 osd.8 up 1.00000 1.00000
#查看ceph集群健康状态
[root@ceph-osd1 ~]# ceph -s
cluster:
id: 0f0abd0c-00d1-4a51-8517-31cb99f76cb3
health: HEALTH_OK
services:
mon: 3 daemons, quorum ceph-mon1,ceph-mon2,ceph-mon3 (age 59s)
mgr: ceph-mgr1(active, since 25m), standbys: ceph-mgr2
osd: 9 osds: 9 up (since 18m), 9 in (since 18m)
task status:
data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0 B
usage: 9.0 GiB used, 441 GiB / 450 GiB avail
pgs: