Multi-Samples AA

# === 0. Conda环境搭建 === conda create -n sc_cellrangeranalysis python=3.10 -y conda activate sc_cellrangeranalysis conda install -c bioconda fastqc multiqc pigz csvtk wget samtools -y # 创建工作目录结构 mkdir -p sc_project/{raw_data/{rna,bcr},ref_genome,software,results,qc_reports,rna_samples/{Control,Self-DNA},bcr_samples/{Control,Self-DNA}} # 建好目录将测序数据文件(包括所有的fastq.gz和md5文件)分别放入`raw_data/rna` # raw_data/bcr`目录中 # === 1. 文件校验 ===使用MD5校验文件完整性。 echo "=== 步骤1/7: 文件校验 ===" # 转录组数据校验 cd sc_project/raw_data/rna md5sum -c *.md5 > ../../qc_reports/rna_md5.log 2>&1 # BCR数据校验 cd ../bcr md5sum -c *.md5 > ../../qc_reports/bcr_md5.log 2>&1 # 检查校验结果 if grep -q 'FAILED' ../qc_reports/*_md5.log; then echo "❌ 文件校验失败!请检查日志:" grep 'FAILED' ../qc_reports/*_md5.log exit 1 else echo "✅ 所有文件校验通过" fi # === 2. 原始数据质控 ===使用FastQC进行原始数据质量检查。 echo "=== 步骤2/7: 原始数据质控 ===" cd ../../qc_reports # 转录组质控 fastqc ../raw_data/rna/*.fastq.gz -t 12 -o rna_fastqc # 使用12线程 multiqc rna_fastqc -n rna_multiqc_report # BCR质控 fastqc ../raw_data/bcr/*.fastq.gz -t 12 -o bcr_fastqc multiqc bcr_fastqc -n bcr_multiqc_report echo "质控报告生成:" echo "转录组:sc_project/qc_reports/rna_multiqc_report.html" echo "BCR:sc_project/qc_reports/bcr_multiqc_report.html" # === 3. 文件组织 === echo "=== 步骤3/7: 文件组织 ===" cd .. # 转录组文件整理 mv raw_data/rna/Control_S87_L004_R1_001.fastq.gz rna_samples/Control/ mv raw_data/rna/Control_S87_L004_R2_001.fastq.gz rna_samples/Control/ mv raw_data/rna/Self-DNA_S86_L004_R1_001.fastq.gz rna_samples/Self-DNA/ mv raw_data/rna/Self-DNA_S86_L004_R2_001.fastq.gz rna_samples/Self-DNA/ # BCR文件整理 mv raw_data/bcr/Control_S113_L008_R1_001.fastq.gz bcr_samples/Control/ mv raw_data/bcr/Control_S113_L008_R2_001.fastq.gz bcr_samples/Control/ mv raw_data/bcr/Self-DNA_S113_L008_R1_001.fastq.gz bcr_samples/Self-DNA/ mv raw_data/bcr/Self-DNA_S113_L008_R2_001.fastq.gz bcr_samples/Self-DNA/ # 简化文件名(适配Cell Ranger) rename 's/_S\d+_L00[48]_//' rna_samples/*/*.fastq.gz rename 's/_S\d+_L00[48]_//' bcr_samples/*/*.fastq.gz # === 4. 下载安装Cell Ranger 9.0.1 === echo "=== 步骤4/7: 安装Cell Ranger 9.0.1 ===" cd software # 下载最新版Cell Ranger (tar.xz格式更小) wget -O cellranger-9.0.1.tar.xz "https://cf.10xgenomics.com/releases/cell-exp/cellranger-9.0.1.tar.xz" # 校验文件完整性 echo "540fe4256c8d12d7f701303983ed3d48 cellranger-9.0.1.tar.xz" | md5sum -c # 解压安装 tar -xvf cellranger-9.0.1.tar.xz export PATH=$(pwd)/cellranger-9.0.1:$PATH # 验证安装 cellranger --version # === 5. 下载参考基因组 === echo "=== 步骤5/7: 下载参考基因组 ===" cd ../ref_genome # 1. 转录组参考基因组(小鼠GRCm39最新版) wget "https://cf.10xgenomics.com/supp/cell-exp/refdata-gex-GRCm39-2024-A.tar.gz" echo "37c51137ccaeabd4d151f80dc86ce0b3 refdata-gex-GRCm39-2024-A.tar.gz" | md5sum -c tar -xzf refdata-gex-GRCm39-2024-A.tar.gz # 2. BCR参考基因组(使用官方GRCm38版本) wget "https://cf.10xgenomics.com/supp/cell-vdj/refdata-cellranger-vdj-GRCm38-alts-ensembl-7.0.0.tar.gz" echo "c6f41db8f67aa83b04d64ba1ae96e681 refdata-cellranger-vdj-GRCm38-alts-ensembl-7.0.0.tar.gz" | md5sum -c tar -xzf refdata-cellranger-vdj-GRCm38-alts-ensembl-7.0.0.tar.gz # 3. 创建兼容链接(解决版本不匹配问题) ln -s refdata-cellranger-vdj-GRCm38-alts-ensembl-7.0.0 refdata-cellranger-vdj-GRCm39-2024-A echo "✅ 创建兼容链接:GRCm38 -> GRCm39" # === 6. 运行Cell Ranger整合分析 === echo "=== 步骤6/7: 运行Cell Ranger整合分析 ===" cd ../results # 创建配置文件 cat > multi_config.csv <<CFG_EOF [gene-expression] reference = ../ref_genome/refdata-gex-GRCm39-2024-A chemistry = auto include-introns = true # 开启内含子(需要RNA速率分析) # 关闭内含子(include-introns = false) [vdj] reference = ../ref_genome/refdata-cellranger-vdj-GRCm39-2024-A # 使用兼容链接 include-introns = false # BCR必须关闭内含子(防止假阳性结果) [samples] rna_Control,vdj_Control rna_Self-DNA,vdj_Self-DNA [rna_Control] fastq_path = ../rna_samples/Control sample = Control [vdj_Control] fastq_path = ../bcr_samples/Control sample = Control [rna_Self-DNA] fastq_path = ../rna_samples/Self-DNA sample = Self-DNA [vdj_Self-DNA] fastq_path = ../bcr_samples/Self-DNA sample = Self-DNA CFG_EOF # 针对i7-12700KF + 64GB RAM优化参数(运行整合分析) echo "启动整合分析(预计4-8小时)..." cellranger multi \ --id=combined_results \ --csv=multi_config.csv \ --expect-cells=10000 \ --localcores=16 \ # 使用16线程(12物理核心+超线程) --localmem=58 \ # 分配52GB内存(保留12GB给系统) --mempercore=4.8 \ # 每核心3GB内存(16×3=48GB) --disable-ui \ # 关闭GUI节省资源 --maxjobs=3 \ # 控制并行任务数防止OO,防止内存溢出 --nosecondary # 跳过耗内存的二级分析 echo "✅ 分析完成!结果保存在: sc_project/results/combined_results" # === 7. 结果解读 === echo "=== 步骤7/7: 结果解读 ===" # 生成总结报告 multiqc combined_results/outs/ -n final_report # 关键结果路径 echo "" echo "================ 分析结果 ================" echo "1. 整合报告: sc_project/results/final_report.html" echo "2. Control转录组: sc_project/results/combined_results/outs/per_sample_outs/Control/count/web_summary.html" echo "3. Control BCR: sc_project/results/combined_results/outs/per_sample_outs/Control/vdj_t/vdj_control.web_summary.html" echo "4. Self-DNA转录组: sc_project/results/combined_results/outs/per_sample_outs/Self-DNA/count/web_summary.html" echo "5. Self-DNA BCR: sc_project/results/combined_results/outs/per_sample_outs/Self-DNA/vdj_t/vdj_control.web_summary.html" echo "" echo "================ 关键指标 ================" echo "转录组质量:" grep "Median Genes per Cell" combined_results/outs/per_sample_outs/*/count/web_summary.html echo "" echo "BCR质量:" grep "Cells with Productive V-J Spanning Pair" combined_results/outs/per_sample_outs/*/vdj_t/vdj_control.web_summary.html echo "==========================================" # 后续分析准备 echo "后续分析建议:" echo "1. 使用Loupe Browser打开.cloupe文件" echo "2. 克隆型分析: combined_results/outs/per_sample_outs/*/vdj_t/clonotypes.csv" echo "3. 基因表达矩阵: combined_results/outs/per_sample_outs/*/count/filtered_feature_bc_matrix.h5" EOF以上cellranger步骤是否可行、
06-26
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值