一、准备工作
-
前置条件ubuntu或者centos7
-
单台虚拟机上安装docker,docker-compose
-
拉取镜像secretflow/ubuntu-base-ci(ubuntu)或者secretflow/secretflow-anolis8:latest(centos7)
-
这里以ubuntu系统为例,前置步骤可以参考
二、单机部署
1.创建目录
mkdir -p /opt/SecretFlow/dockerfile //存放dockerfile文件
mkdir -p /opt/SecretFlow/workspace/{scripts,data,configs,logs} //映射容器内的workspace工作区
2.编写dockerfile
cd /opt/SecretFlow/dockerfile
vim dockerfile-ssh
3.dockerfile-ssh内容如下
FROM secretflow/secretflow-anolis8:latest
# 安装SSH服务
RUN yum install -y openssh-server openssh-clients && \
yum clean all && \
# 修复/var/empty/sshd权限问题
mkdir -p /var/empty/sshd && \
chown 0:0 /var/empty/sshd && \
chmod 0711 /var/empty/sshd
# 配置SSH(修正续行符,确保\后无空格)
RUN mkdir -p /var/run/sshd && \
ssh-keygen -A && \
sed -i 's/^#PermitRootLogin yes/PermitRootLogin yes/' /etc/ssh/sshd_config && \
sed -i 's/^#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config
# 设置root密码
RUN echo 'root:123456' | chpasswd
# 创建启动脚本
RUN touch /start.sh && \
echo '#!/bin/bash' >> /start.sh && \
echo '# 强制修复/var/empty/sshd权限' >> /start.sh && \
echo 'mkdir -p /var/empty/sshd' >> /start.sh && \
echo 'chown -R root:root /var/empty/sshd' >> /start.sh && \
echo 'chmod 0711 /var/empty/sshd' >> /start.sh && \
echo '' >>/start.sh && \
echo '# 启动sshd并保持容器运行' >> /start.sh && \
echo '/usr/sbin/sshd -d' >> /start.sh
# 修正脚本权限和路径
RUN chmod +x /start.sh
WORKDIR /workspace
ENV PYTHONPATH=/workspace
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
#copy start.sh
RUN mv /start.sh /workspace/start.sh && \
chmod +x /workspace/start.sh
EXPOSE 22 10001 20001 20002 20003 8888
ENTRYPOINT ["./start.sh"]
4.通过这个dockerfile创建一个可以ssh连接的新镜像
docker build -t secretflow-ssh:v1.0 -f dockerfile-centos .
三、多节点部署
1.克隆
关闭A虚拟机并克隆出B,C虚拟机
2.修改配置
(1)修改B,C虚拟机ip地址以及主机名
开启B虚拟机
root@A:~# cd /etc/netplan
root@A:/etc/netplan# ls
01-network-manager-all.yaml
root@A:/etc/netplan# vim 01-network-manager-all.yaml

# 应用网络配置
sudo netplan apply
# 修改主机名
sudo hostnamectl set-hostname B
节点C也是一样的操作
先修改ip地址再修改主机名
(2)编辑docker-compose文件
driver节点
version: '3.8'
services:
secretflow-driver:
image: secretflow-ssh:v1.0
container_name: sf_driver
ports:
- "2222:22"
environment:
- NODE_NAME=Driver
- NODE_IP=192.168.127.130
- NODE_ROLE=driver
- PARTY_NAME=driver
volumes:
- ./scripts:/workspace/scripts
- ./data:/workspace/data
- ./data/driver:/workspace/data/driver
- ./configs:/workspace/configs
- ./logs:/workspace/logs
working_dir: /workspace
command: >
sh -c "
echo '🚀 SecretFlow Driver节点启动成功' &&
echo 'IP: 192.168.127.130' &&
echo '角色: 调度节点' &&
sleep infinity
"
restart: unless-stopped
stdin_open: true
tty: true
a节点
version: '3.8'
services:
secretflow-node-a:
image: secretflow-ssh:v1.0
container_name: sf_a
ports:
- "2222:22"
environment:
- NODE_NAME=A
- NODE_IP=192.168.127.131
- NODE_ROLE=party
- PARTY_NAME=alice
volumes:
- ./data:/workspace/data
- ./data/a:/workspace/data/a
- ./logs:/workspace/logs
working_dir: /workspace
command: >
sh -c "
echo '🏛️ Museum A节点启动成功' &&
echo 'IP: 192.168.127.131' &&
echo '角色: 数据节点 (alice)' &&
sleep infinity
"
restart: unless-stopped
stdin_open: true
tty: true
b节点
version: '3.8'
services:
secretflow-node-b:
image: secretflow-ssh:v1.0
container_name: sf_b
network_mode: "host"
environment:
- NODE_NAME=B
- NODE_IP=192.168.127.132
- NODE_ROLE=party
- PARTY_NAME=bob
volumes:
- ./data:/workspace/data
- ./data/b:/workspace/data/b
- ./logs:/workspace/logs
working_dir: /workspace
command: >
sh -c "
echo '🏛️ Museum B节点启动成功' &&
echo 'IP: 192.168.127.132' &&
echo '角色: 数据节点 (bob)' &&
sleep infinity
"
restart: unless-stopped
stdin_open: true
tty: true
c节点
version: '3.8'
services:
secretflow-node-c:
image: secretflow-ssh:v1.0
container_name: sf_c
network_mode: "host"
environment:
- NODE_NAME=C
- NODE_IP=192.168.127.133
- NODE_ROLE=party
- PARTY_NAME=charlie
volumes:
- ./data:/workspace/data
- ./data/c:/workspace/data/c
- ./logs:/workspace/logs
working_dir: /workspace
command: >
sh -c "
echo '🏛️ Museum C节点启动成功' &&
echo 'IP: 192.168.127.133' &&
echo '角色: 数据节点 (charlie)' &&
sleep infinity
"
restart: unless-stopped
stdin_open: true
tty: true
3.启动容器
docker-compose up -d
4.启动ray集群
# 启动Ray头节点
ray start --head --node-ip-address=192.168.127.130 --port=10001 --include-dashboard=true --dashboard-port=8265
# a加入Ray集群
ray start --address=192.168.127.130:10001 --node-ip-address=192.168.127.131
# b加入Ray集群
ray start --address=192.168.127.130:10001 --node-ip-address=192.168.127.132
# c加入Ray集群
ray start --address=192.168.127.130:10001 --node-ip-address=192.168.127.133
# 在节点driver检查
docker exec sf-driver ray status
# 应该看到4个节点:192.168.127.130,192.168.127.131, 192.168.127.132, 192.168.127.133
运行分布式计算测试
driver创建分布式计算测试脚本 /root/distributed_test.py:
import secretflow as sf
import json
import numpy as np
from secretflow.data.vertical import VDataFrame
from secretflow.device import SPU
# 加载配置
with open('/root/cluster_config.json', 'r') as f:
cluster_config = json.load(f)
# 初始化
sf.init(
address=cluster_config['ray_head_addr'],
cluster_config=cluster_config
)
print("🚀 开始分布式计算测试...")
# 创建测试数据
def create_test_data():
if sf.PYU('alice').party == 'alice':
return np.random.rand(100, 3)
elif sf.PYU('bob').party == 'bob':
return np.random.rand(100, 2)
else:
return np.random.rand(100, 2)
# 在各参与方创建数据
alice_data = sf.PYU('alice')(create_test_data)()
bob_data = sf.PYU('bob')(create_test_data)()
carol_data = sf.PYU('carol')(create_test_data)()
# 创建垂直数据框
vdf = VDataFrame(
partitions={
'alice': alice_data,
'bob': bob_data,
'carol': carol_data
}
)
print("✅ 垂直数据框创建成功")
print(f"数据分区: {list(vdf.partitions.keys())}")
# 测试SPU安全计算
print("\n🔒 测试SPU安全计算...")
spu = SPU(cluster_config=cluster_config['parties'])
# 简单的安全聚合测试
@sf.device(spu)
def secure_aggregate(data1, data2, data3):
import numpy as np
sum1 = np.sum(data1)
sum2 = np.sum(data2)
sum3 = np.sum(data3)
return sum1 + sum2 + sum3
# 执行安全计算
result = secure_aggregate(
alice_data.to(spu),
bob_data.to(spu),
carol_data.to(spu)
)
print(f"安全聚合结果: {result}")
print("\n🎊 分布式计算测试完成!")
在driver容器中运行:
python distributed_test.py
测试SecretFlow初始化
python
# 在节点A运行测试
import secretflow as sf
import json
# 读取配置
with open('/root/secretflow/data/cluster_config.json', 'r') as f:
cluster_config = json.load(f)
print(f"启动节点: {cluster_config['self_party']}")
# 使用头节点的node-ip-address和port
sf.init(
address=cluster_config['ray_head_addr'], # 192.168.127.131:10001
cluster_config=cluster_config
)
print("SecretFlow分布式集群初始化成功!")
# 验证各参与方
alice = sf.PYU('alice')
bob = sf.PYU('bob')
carol = sf.PYU('carol')
print("所有参与方创建成功!")
420

被折叠的 条评论
为什么被折叠?



