DeepID Generation I

本文介绍了一种基于DeepID-I的人脸识别系统的实现过程,包括网络结构、训练细节及测试结果。该系统使用CASIA WebFace数据集进行训练,并在LFW数据集上达到了91.9%的准确率。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Original Source: http://blog.youkuaiyun.com/a_1937/article/details/50334919

在原Blog基础上进行了修改,未能完全复现博主结果。


GitHub Source: https://github.com/HolmesShuan/DeepID-I-Reimplement


LFW Accuracy:91.9%

Val DataSet:LFW 128x128 Gray

Train DataSet:CASIA WebFace Washed 144x144 Gray (Crop to 128x128) 

Train Details: 60 epoch 

Train Accuracy: 80% +-

Test Accuracy: 74% +-


Train DataSet, Val DataSet均做过人脸Crop的Preprocess.


Network Structure:

name: "DEEPFACE"  
layer {
  name: "input_data"
  top: "data"
  top: "label"
  type: "Data"
  data_param {
    source: "/dataset/WebFace_train_lmdb"  
    backend: LMDB
    batch_size: 400
  }
  transform_param {
    mirror: true
    crop_size: 128
    mean_file: "/dataset/mean.binaryproto"  
  }
  include {
    phase: TRAIN
  }
}
layer {
  name: "input_data"
  top: "data"
  top: "label"
  type: "Data"
  data_param {
    source: "/dataset/WebFace_val_lmdb"  
    backend: LMDB
    batch_size: 200
  }
  transform_param {
    mirror: false
    crop_size: 128
    mean_file: "/dataset/mean.binaryproto"  
  }
  include {
    phase: TEST
  }
}

layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    name: "conv1_w"
    lr_mult: 1
    decay_mult: 0
  }
  param {
    name: "conv1_b"
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 20
    kernel_size: 4
    stride: 1
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
    }
  }
}

layer {
  name: "relu1"
  type: "ReLU"
  bottom: "conv1"
  top: "conv1"
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    name: "conv2_w"
    lr_mult: 1
    decay_mult: 0
  }
  param {
    name: "conv2_b"
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 40
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
    }
  }
}

layer {
  name: "relu2"
  type: "ReLU"
  bottom: "conv2"
  top: "conv2"
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 1
  }
}
layer {
  name: "conv3"
  type: "Convolution"
  bottom: "pool2"
  top: "conv3"
  param {
    name: "conv3_w"
    lr_mult: 1
    decay_mult: 0
  }
  param {
    name: "conv3_b"
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 60
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
    }
  }
}

layer {
  name: "relu3"
  type: "ReLU"
  bottom: "conv3"
  top: "conv3"
}
layer {
  name: "pool3"
  type: "Pooling"
  bottom: "conv3"
  top: "pool3"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv4"
  type: "Convolution"
  bottom: "pool3"
  top: "conv4"
  param {
    name: "conv4_w"
    lr_mult: 1
    decay_mult: 0
  }
  param {
    name: "conv4_b"
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 80
    kernel_size: 2
    stride: 1
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu4"
  type: "ReLU"
  bottom: "conv4"
  top: "conv4"
}
layer {
  name: "fc160_1"
  type: "InnerProduct"
  bottom: "pool3"
  top: "fc160_1"
  param {
    name: "fc160_1_w"
    lr_mult: 1
    decay_mult: 1
  }
  param {
    name: "fc160_1_b"
    lr_mult: 2
    decay_mult: 1
  }
  inner_product_param {
    num_output: 256
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
    }
  }
}

layer {
  name: "fc160_2"
  type: "InnerProduct"
  bottom: "conv4"
  top: "fc160_2"
  param {
    name: "fc160_2_w"
    lr_mult: 1
    decay_mult: 1
  }
  param {
    name: "fc160_2_b"
    lr_mult: 2
    decay_mult: 1
  }
  inner_product_param {
    num_output: 256
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
    }
  }
}

layer {
  name: "fc160"
  type: "Eltwise"
  bottom: "fc160_1"
  bottom: "fc160_2"
  top: "fc160"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "dropout"
  type: "Dropout"
  bottom: "fc160"
  top: "fc160"
  dropout_param {
    dropout_ratio: 0.4
  }
}

layer {
  name: "fc_class"
  type: "InnerProduct"
  bottom: "fc160"
  top: "fc_class"
  param {
    name: "fc_class_w"
    lr_mult: 1
    decay_mult: 1
  }
  param {
    name: "fc_class_b"
    lr_mult: 2
    decay_mult: 1
  }
  inner_product_param {
    num_output: 10574
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "fc_class"
  bottom: "label"
  top: "loss"
}

layer {
  name: "accuracy_TEST"
  type: "Accuracy"
  bottom: "fc_class"
  bottom: "label"
  top: "accuracy_TEST"
  include: { phase: TEST }
}

layer {
  name: "accuracy_TRAIN"
  type: "Accuracy"
  bottom: "fc_class"
  bottom: "label"
  top: "accuracy_TRAIN"
  include: { phase: TRAIN }
}
Get Features from "fc160" {dim has been changed from 160 to 256}


Solver.prototxt:

net: "models/deepface/train_val.prototxt"  

test_iter: 227
test_interval: 2044    
  
base_lr: 0.01  
lr_policy: "multistep"  
gamma: 0.5 
stepvalue: 12264  
stepvalue: 20440 
stepvalue: 28616
stepvalue: 36792
stepvalue: 51100
stepvalue: 75628
stepvalue: 89936
stepvalue: 102200
stepvalue: 114464
stepvalue: 126728
stepvalue: 138992
momentum: 0.9  
weight_decay: 0.005  
display: 100
max_iter: 138992
snapshot: 8176
snapshot_prefix: "models/deepface/WebFace"  
solver_mode: GPU  
device_id: 0  

Related Files:

如果你对LFW数据集的正确率测定方式不熟悉,请参阅:

http://blog.youkuaiyun.com/baidu_24281959/article/details/53218825

文件中给出了构建train数据集的val.txt与train.txt

用于测定LFW数据集Accuracy的包含正负样本对的.txt文件

用于测试的.py文件

生成lmdb文件的.sh文件

Here.




<think>嗯,用户这次的问题很具体,想在联想Y9000P笔记本上部署DeepSeek大语言模型。看来用户应该是个技术爱好者,可能想本地运行大模型做实验或开发。从引用内容看,用户之前关注过系统设计和机器学习数据集,可能有一定技术基础但不太熟悉本地部署细节。首先得明确DeepSeek模型的性质:它是基于Transformer架构的开源模型,支持纯文本和多模态版本。笔记本部署需要处理几个关键限制:显存限制(RTX3060只有6GB)、32G内存可能不足、以及生成式模型的磁盘空间需求。用户没说明具体用哪个版本,但考虑到显卡配置,必须推荐量化版本。环境配置要分层次说明:系统级(推荐WSL避免驱动问题)、Python环境(conda管理依赖)、PyTorch(带CUDA支持)。特别要注意RTX30系列需要CUDA11+才能兼容,这个在官方文档里经常被忽略。运行步骤中最关键的是模型获取——官方HuggingFace仓库提供4bit量化版正好匹配用户显卡。得提醒用户用huggingface-cli登录下载,避免直接wget断联。推理代码示例需要包含量化加载和显存优化参数,特别是max_memory映射那块对笔记本多卡用户很重要。功耗问题可能出乎用户意料,需要特别警告:高负载时笔记本可能突破180W电源适配器上限,这会导致电池补偿放电影响寿命。建议在BIOS里限制CPUTDP到45W。相关问题设计成梯度式:模型选择->性能优化->应用场景,这样既能解决后续问题,又能帮用户理清使用路径。特别加入VLLM优化是因为用户提到过并发需求,这个在引用[1]里提到过消息队列,可以呼应。</think> 以下是在联想拯救者 Y9000P 笔记本上本地部署 DeepSeek 大语言模型的完整步骤,结合设备特性和模型需求优化部署方案: --- ### **核心准备工作** 1. **硬件确认** Y9000P 典型配置(需满足最低要求): - ✅ **GPU**:RTX 3060 (6GB VRAM) *或更高* - ✅ **RAM**:32GB DDR5(建议不低于16GB) - ✅ **磁盘**:预留至少50GB SSD空间(模型文件 + 虚拟环境) *注:VRAM 小于 8GB 需使用量化模型* 2. **系统环境** ```bash # 推荐系统(任选其一) - Windows 11 + WSL 2 (Ubuntu 22.04) # 充分利用GPU资源 - Ubuntu 22.04 双系统 # 原生Linux性能更优 ``` --- ### **部署流程(以 WSL 为例)** #### **步骤1:配置基础环境** ```bash # 启用WSL并安装Ubuntu wsl --install -d Ubuntu-22.04 # 更新系统 sudo apt update && sudo apt upgrade -y # 安装NVIDIA驱动(Windows主机端需先装官方驱动) sudo apt install nvidia-cuda-toolkit ``` #### **步骤2:安装 Miniconda 环境** ```bash # 下载并安装Miniconda wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh bash Miniconda3-latest-Linux-x86_64.sh # 创建虚拟环境 conda create -n deepseek python=3.10 -y conda activate deepseek ``` #### **步骤3:安装 PyTorch 与依赖库** ```bash # 安装带CUDA支持的PyTorch (RTX 30系需CUDA 11+) pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 # 安装模型运行库 pip install transformers accelerate bitsandbytes # 关键量化支持 pip install sentencepiece # Tokenizer依赖 ``` #### **步骤4:下载量化模型(适配6GB VRAM)** ```python from transformers import AutoTokenizer, pipeline import torch model_id = "deepseek-ai/deepseek-llm-7b-chat-int4" # 4bit量化版本 # 加载配置(显存优化关键!) model_kwargs = { "device_map": "auto", "load_in_4bit": True, "torch_dtype": torch.float16, "max_memory": {0: "5GiB", "cpu": "24GiB"} # 限制GPU内存溢出 } tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id, **model_kwargs) ``` #### **步骤5:运行推理测试** ```python pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) prompt = "请用中文解释量子计算" result = pipe( prompt, max_new_tokens=256, temperature=0.7, do_sample=True ) print(result[0]['generated_text']) ``` --- ### **关键优化措施** 1. **显存瓶颈解决方案** - 使用 `load_in_4bit` 参数加载4bit量化模型 - 通过 `max_memory` 分配策略防止OOM(参考步骤4) - 添加交换空间:`sudo fallocate -l 16G /swapfile && sudo mkswap /swapfile` 2. **性能提升技巧** ```bash # 启用FlashAttention-2(需安装依赖) pip install flash-attn --no-build-isolation model = AutoModelForCausalLM.from_pretrained(..., use_flash_attention_2=True) ``` --- ### **部署验证** 成功运行时终端将显示: ```text [GPU] VRAM占用: 4.2/6.0 GB | 生成速度: 15 token/s > 量子计算利用量子比特的叠加态... ``` > ⚠️ **功耗注意**:长时间高负载运行需开启"极速模式" + 散热垫,避免CPU/GPU过热降频 --- ### 相关问题 1. **如何在6GB显存设备上运行13B参数模型?** 2. **DeepSeek模型支持哪些量化格式(GGUF/AWQ)?** 3. **如何用vLLM框架提升本地模型并发能力?** 4. **大语言模型部署如何优化电源管理(笔记本场景)?** [^1]: 高并发系统设计需平衡资源分配与模块解耦 [^2]: MNIST数据集规范对模型训练具有重要参考意义
评论 9
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值