SecretFlow集群搭建

一、准备工作

  1. 前置条件ubuntu或者centos7

  2. 单台虚拟机上安装docker,docker-compose

  3. 拉取镜像secretflow/ubuntu-base-ci(ubuntu)或者secretflow/secretflow-anolis8:latest(centos7)

  4. 这里以ubuntu系统为例,前置步骤可以参考

1.ubuntu系统安装https://blog.youkuaiyun.com/fluency_11/article/details/152731001?fromshare=blogdetail&sharetype=blogdetail&sharerId=152731001&sharerefer=PC&sharesource=fluency_11&sharefrom=from_link

2.ubuntu系统中安装docker,dokcer-composehttps://blog.youkuaiyun.com/fluency_11/article/details/152799954?fromshare=blogdetail&sharetype=blogdetail&sharerId=152799954&sharerefer=PC&sharesource=fluency_11&sharefrom=from_link

二、单机部署

1.创建目录

mkdir -p /opt/SecretFlow/dockerfile //存放dockerfile文件
mkdir -p /opt/SecretFlow/workspace/{scripts,data,configs,logs} //映射容器内的workspace工作区

2.编写dockerfile

cd /opt/SecretFlow/dockerfile
vim dockerfile-ssh

3.dockerfile-ssh内容如下

FROM secretflow/secretflow-anolis8:latest

# 安装SSH服务
RUN yum install -y openssh-server openssh-clients && \
    yum clean all && \
    # 修复/var/empty/sshd权限问题
    mkdir -p /var/empty/sshd && \
    chown 0:0 /var/empty/sshd && \
    chmod 0711 /var/empty/sshd

# 配置SSH(修正续行符,确保\后无空格)
RUN mkdir -p /var/run/sshd && \
    ssh-keygen -A && \
    sed -i 's/^#PermitRootLogin yes/PermitRootLogin yes/' /etc/ssh/sshd_config && \
    sed -i 's/^#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config

# 设置root密码
RUN echo 'root:123456' | chpasswd

# 创建启动脚本
RUN touch /start.sh && \
    echo '#!/bin/bash' >> /start.sh && \
    echo '# 强制修复/var/empty/sshd权限' >> /start.sh && \
    echo 'mkdir -p /var/empty/sshd' >> /start.sh && \
    echo 'chown -R root:root /var/empty/sshd' >> /start.sh && \
    echo 'chmod 0711 /var/empty/sshd' >> /start.sh && \
    echo '' >>/start.sh && \
    echo '# 启动sshd并保持容器运行' >> /start.sh && \
    echo '/usr/sbin/sshd -d' >> /start.sh 


# 修正脚本权限和路径
RUN chmod +x /start.sh 

WORKDIR /workspace
ENV PYTHONPATH=/workspace
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin

#copy start.sh
RUN mv /start.sh /workspace/start.sh && \
    chmod +x /workspace/start.sh


EXPOSE 22 10001 20001 20002 20003 8888

ENTRYPOINT ["./start.sh"]

4.通过这个dockerfile创建一个可以ssh连接的新镜像

docker build -t secretflow-ssh:v1.0 -f dockerfile-centos .

三、多节点部署

1.克隆

关闭A虚拟机并克隆出B,C虚拟机

2.修改配置

(1)修改B,C虚拟机ip地址以及主机名

开启B虚拟机

root@A:~# cd /etc/netplan
root@A:/etc/netplan# ls
01-network-manager-all.yaml
root@A:/etc/netplan# vim 01-network-manager-all.yaml 

# 应用网络配置
sudo netplan apply
# 修改主机名
sudo hostnamectl set-hostname B

节点C也是一样的操作

先修改ip地址再修改主机名

(2)编辑docker-compose文件

driver节点
version: '3.8'

services:
  secretflow-driver:
    image: secretflow-ssh:v1.0
    container_name: sf_driver
    ports:
      - "2222:22"
    environment:
      - NODE_NAME=Driver
      - NODE_IP=192.168.127.130
      - NODE_ROLE=driver
      - PARTY_NAME=driver
    volumes:
      - ./scripts:/workspace/scripts
      - ./data:/workspace/data
      - ./data/driver:/workspace/data/driver
      - ./configs:/workspace/configs
      - ./logs:/workspace/logs
    working_dir: /workspace
    command: >
      sh -c "
        echo '🚀 SecretFlow Driver节点启动成功' &&
        echo 'IP: 192.168.127.130' &&
        echo '角色: 调度节点' &&
        sleep infinity
      "
    restart: unless-stopped
    stdin_open: true
    tty: true
a节点
version: '3.8'

services:
  secretflow-node-a:
    image: secretflow-ssh:v1.0
    container_name: sf_a
    ports:
      - "2222:22"
    environment:
      - NODE_NAME=A
      - NODE_IP=192.168.127.131
      - NODE_ROLE=party
      - PARTY_NAME=alice
    volumes:
      - ./data:/workspace/data
      - ./data/a:/workspace/data/a
      - ./logs:/workspace/logs
    working_dir: /workspace
    command: >
      sh -c "
        echo '🏛️  Museum A节点启动成功' &&
        echo 'IP: 192.168.127.131' &&
        echo '角色: 数据节点 (alice)' &&
        sleep infinity
      "
    restart: unless-stopped
    stdin_open: true
    tty: true
b节点
version: '3.8'

services:
  secretflow-node-b:
    image: secretflow-ssh:v1.0
    container_name: sf_b
    network_mode: "host"
    environment:
      - NODE_NAME=B
      - NODE_IP=192.168.127.132
      - NODE_ROLE=party
      - PARTY_NAME=bob
    volumes:
      - ./data:/workspace/data
      - ./data/b:/workspace/data/b
      - ./logs:/workspace/logs
    working_dir: /workspace
    command: >
      sh -c "
        echo '🏛️  Museum B节点启动成功' &&
        echo 'IP: 192.168.127.132' &&
        echo '角色: 数据节点 (bob)' &&
        sleep infinity
      "
    restart: unless-stopped
    stdin_open: true
    tty: true
c节点
version: '3.8'

services:
  secretflow-node-c:
    image: secretflow-ssh:v1.0
    container_name: sf_c
    network_mode: "host"
    environment:
      - NODE_NAME=C
      - NODE_IP=192.168.127.133
      - NODE_ROLE=party
      - PARTY_NAME=charlie
    volumes:
      - ./data:/workspace/data
      - ./data/c:/workspace/data/c
      - ./logs:/workspace/logs
    working_dir: /workspace
    command: >
      sh -c "
        echo '🏛️  Museum C节点启动成功' &&
        echo 'IP: 192.168.127.133' &&
        echo '角色: 数据节点 (charlie)' &&
        sleep infinity
      "
    restart: unless-stopped
    stdin_open: true
    tty: true

3.启动容器

docker-compose up -d

4.启动ray集群

# 启动Ray头节点
ray start --head --node-ip-address=192.168.127.130 --port=10001 --include-dashboard=true --dashboard-port=8265
# a加入Ray集群
ray start --address=192.168.127.130:10001 --node-ip-address=192.168.127.131
# b加入Ray集群
ray start --address=192.168.127.130:10001 --node-ip-address=192.168.127.132
# c加入Ray集群
ray start --address=192.168.127.130:10001 --node-ip-address=192.168.127.133
# 在节点driver检查
docker exec sf-driver ray status
# 应该看到4个节点:192.168.127.130,192.168.127.131, 192.168.127.132, 192.168.127.133

运行分布式计算测试

driver创建分布式计算测试脚本 /root/distributed_test.py

import secretflow as sf
import json
import numpy as np
from secretflow.data.vertical import VDataFrame
from secretflow.device import SPU

# 加载配置
with open('/root/cluster_config.json', 'r') as f:
    cluster_config = json.load(f)

# 初始化
sf.init(
    address=cluster_config['ray_head_addr'],
    cluster_config=cluster_config
)

print("🚀 开始分布式计算测试...")

# 创建测试数据
def create_test_data():
    if sf.PYU('alice').party == 'alice':
        return np.random.rand(100, 3)
    elif sf.PYU('bob').party == 'bob':
        return np.random.rand(100, 2)
    else:
        return np.random.rand(100, 2)

# 在各参与方创建数据
alice_data = sf.PYU('alice')(create_test_data)()
bob_data = sf.PYU('bob')(create_test_data)()
carol_data = sf.PYU('carol')(create_test_data)()

# 创建垂直数据框
vdf = VDataFrame(
    partitions={
        'alice': alice_data,
        'bob': bob_data, 
        'carol': carol_data
    }
)

print("✅ 垂直数据框创建成功")
print(f"数据分区: {list(vdf.partitions.keys())}")

# 测试SPU安全计算
print("\n🔒 测试SPU安全计算...")
spu = SPU(cluster_config=cluster_config['parties'])

# 简单的安全聚合测试
@sf.device(spu)
def secure_aggregate(data1, data2, data3):
    import numpy as np
    sum1 = np.sum(data1)
    sum2 = np.sum(data2) 
    sum3 = np.sum(data3)
    return sum1 + sum2 + sum3

# 执行安全计算
result = secure_aggregate(
    alice_data.to(spu),
    bob_data.to(spu), 
    carol_data.to(spu)
)

print(f"安全聚合结果: {result}")

print("\n🎊 分布式计算测试完成!")

在driver容器中运行:

python distributed_test.py

测试SecretFlow初始化

python

# 在节点A运行测试
import secretflow as sf
import json

# 读取配置
with open('/root/secretflow/data/cluster_config.json', 'r') as f:
    cluster_config = json.load(f)

print(f"启动节点: {cluster_config['self_party']}")

# 使用头节点的node-ip-address和port
sf.init(
    address=cluster_config['ray_head_addr'],  # 192.168.127.131:10001
    cluster_config=cluster_config
)

print("SecretFlow分布式集群初始化成功!")

# 验证各参与方
alice = sf.PYU('alice')
bob = sf.PYU('bob') 
carol = sf.PYU('carol')

print("所有参与方创建成功!")
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值