YT ATAC数据整理

ls *1.fq.gz > ../config/1
ls *2.fq.gz > ../config/2
ls *1.fq.gz| cut -d'_' -f 1 > ../config/0
cd ../config/
paste 0 1 2 > raw.config

 Raw data triming 

aligning to genome 

rmdup

filtering to lastbam

以上四部分操作整合到一个脚本中,直接可以从raw fq.gz转换到lastbam

#整体于rnaseq环境下执行
#用于trim接头和低质量序列

#/home/xintong/ATAC_Project/yt_ATAC 脚本在此文件夹下
mkdir -p clean
mkdir -p align
mkdir -p rmdup
mkdir -p lastbam
#./config/raw.config已构建

cat ./config/raw.config  |while read id;
do echo $id
arr=($id)
fq2=${arr[2]}
fq1=${arr[1]}
sample=${arr[0]}
echo Now I am processing the raw data $sample
echo $fq1
echo $fq2
trim_galore -q 25 --phred33 --length 35 -e 0.1 --stringency 4 --paired -o ./clean  ./raw/$fq1   ./raw/$fq2

cd /home/xintong/ATAC_Project/yt_ATAC/clean
echo ${fq1%.fq.gz}_val_1.fq.gz
echo ${fq2%.fq.gz}_val_2.fq.gz
#bowtie2.sh
bowtie2 -p 25 --very-sensitive -X 2000 -x /home/ruiqing/reference/ricegenome -1 ${fq1%.fq.gz}_val_1.fq.gz -2 ${fq2%.fq.gz}_val_2.fq.gz | samtools sort -T ./ -O bam -@ 25 -o - > /home/xintong/ATAC_Project/yt_ATAC/align/${sample}.bam
samtools index /home/xintong/ATAC_Project/yt_ATAC/align/${sample}.bam
samtools flagstat /home/xintong/ATAC_Project/yt_ATAC/align/${sample}.bam > /home/xintong/ATAC_Project/yt_ATAC/align/${sample}.stat
#flagstat需要写绝对路径


sambamba markdup --overflow-list-size 600000 -t 20 --tmpdir='./' -r /home/xintong/ATAC_Project/yt_ATAC/align/${sample}.bam  /home/xintong/ATAC_Project/yt_ATAC/rmdup/${sample}.rmdup.bam
samtools index /home/xintong/ATAC_Project/yt_ATAC/rmdup/${sample}.rmdup.bam
samtools flagstat /home/xintong/ATAC_Project/yt_ATAC/rmdup/${sample}.rmdup.bam > /home/xintong/ATAC_Project/yt_ATAC/rmdup/${sample}.rmdup.stat

samtools view -h -f 2 -q 30 /home/xintong/ATAC_Project/yt_ATAC/rmdup/${sample}.rmdup.bam |grep -v ChrUn|grep -v ChrSy|samtools sort -T ./ -O bam -@ 20 -o - > /home/xintong/ATAC_Project/yt_ATAC/lastbam/${sample}.last.bam
samtools index /home/xintong/ATAC_Project/yt_ATAC/lastbam/${sample}.last.bam
samtools flagstat /home/xintong/ATAC_Project/yt_ATAC/lastbam/${sample}.last.bam > /home/xintong/ATAC_Project/yt_ATAC/lastbam/${sample}.last.stat
bedtools bamtobed -i /home/xintong/ATAC_Project/yt_ATAC/lastbam/${sample}.last.bam  > /home/xintong/ATAC_Project/yt_ATAC/lastbam/${sample}.last.bed
done

 bam to bw and ploting

###atac_seq虚拟环境
Projpath=/home/xintong/ATAC_Project/yt_ATAC
mkdir -p ${Projpath}/bw
cd ${Projpath}/lastbam

ls *.bam |while read id;do
echo $id
bamCoverage -p 30 -bs 10  --normalizeUsing RPKM --smoothLength 20 -b $id -o ${Projpath}/bw/${id%%.*}.bw 
done

cd ${Projpath}/bw
computeMatrix scale-regions  -p 30  \
-b 10000 -a 10000    \
-R /home/ruiqing/reference/all.gtf  \
-S ${Projpath}/bw/*.bw  \
--skipZeros  -o matrix1_test_body.gz  \
--outFileSortedRegions matrix1_test.bed


plotProfile -m matrix1_test_body.gz  -out test_body_Profile.png
plotHeatmap -m matrix1_test_body.gz  -out test_body_Heatmap.png


mkdir -p ${Projpath}/peaks
###########如果atac_seq虚拟环境下有macs2,可执行下列脚本
cd ${Projpath}/lastbam
#callpeak.sh
ls *.bed | while read id ;do (macs2 callpeak -t $id  -g 4e8 --nomodel --shift  -100 --extsize 200  -n ${id%%.*} --outdir ${Projpath}/peaks) ;done

  peaks calling

#################rnaseq环境下
Projpath=/home/xintong/ATAC_Project/yt_ATAC
cd ${Projpath}/lastbam
#callpeak.sh
ls *.bed | while read id ;do (macs2 callpeak -t $id  -g 4e8 --nomodel --shift  -100 --extsize 200  -n ${id%%.*} --outdir ${Projpath}/peaks) ;done


 bedtools intersect -a 370-1-1_peaks.narrowPeak -b 370-1-2_peaks.narrowPeak 370-1-3_peaks.narrowPeak -wo > 370_merge.bed

 bedtools intersect -a ZH11-1-1_peaks.narrowPeak -b ZH11-1-2_peaks.narrowPeak ZH11-1-3_peaks.narrowPeak -wo >ZH11_merge.bed

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值