ls *1.fq.gz > ../config/1
ls *2.fq.gz > ../config/2
ls *1.fq.gz| cut -d'_' -f 1 > ../config/0
cd ../config/
paste 0 1 2 > raw.config
Raw data triming
aligning to genome
rmdup
filtering to lastbam
以上四部分操作整合到一个脚本中,直接可以从raw fq.gz转换到lastbam
#整体于rnaseq环境下执行
#用于trim接头和低质量序列
#/home/xintong/ATAC_Project/yt_ATAC 脚本在此文件夹下
mkdir -p clean
mkdir -p align
mkdir -p rmdup
mkdir -p lastbam
#./config/raw.config已构建
cat ./config/raw.config |while read id;
do echo $id
arr=($id)
fq2=${arr[2]}
fq1=${arr[1]}
sample=${arr[0]}
echo Now I am processing the raw data $sample
echo $fq1
echo $fq2
trim_galore -q 25 --phred33 --length 35 -e 0.1 --stringency 4 --paired -o ./clean ./raw/$fq1 ./raw/$fq2
cd /home/xintong/ATAC_Project/yt_ATAC/clean
echo ${fq1%.fq.gz}_val_1.fq.gz
echo ${fq2%.fq.gz}_val_2.fq.gz
#bowtie2.sh
bowtie2 -p 25 --very-sensitive -X 2000 -x /home/ruiqing/reference/ricegenome -1 ${fq1%.fq.gz}_val_1.fq.gz -2 ${fq2%.fq.gz}_val_2.fq.gz | samtools sort -T ./ -O bam -@ 25 -o - > /home/xintong/ATAC_Project/yt_ATAC/align/${sample}.bam
samtools index /home/xintong/ATAC_Project/yt_ATAC/align/${sample}.bam
samtools flagstat /home/xintong/ATAC_Project/yt_ATAC/align/${sample}.bam > /home/xintong/ATAC_Project/yt_ATAC/align/${sample}.stat
#flagstat需要写绝对路径
sambamba markdup --overflow-list-size 600000 -t 20 --tmpdir='./' -r /home/xintong/ATAC_Project/yt_ATAC/align/${sample}.bam /home/xintong/ATAC_Project/yt_ATAC/rmdup/${sample}.rmdup.bam
samtools index /home/xintong/ATAC_Project/yt_ATAC/rmdup/${sample}.rmdup.bam
samtools flagstat /home/xintong/ATAC_Project/yt_ATAC/rmdup/${sample}.rmdup.bam > /home/xintong/ATAC_Project/yt_ATAC/rmdup/${sample}.rmdup.stat
samtools view -h -f 2 -q 30 /home/xintong/ATAC_Project/yt_ATAC/rmdup/${sample}.rmdup.bam |grep -v ChrUn|grep -v ChrSy|samtools sort -T ./ -O bam -@ 20 -o - > /home/xintong/ATAC_Project/yt_ATAC/lastbam/${sample}.last.bam
samtools index /home/xintong/ATAC_Project/yt_ATAC/lastbam/${sample}.last.bam
samtools flagstat /home/xintong/ATAC_Project/yt_ATAC/lastbam/${sample}.last.bam > /home/xintong/ATAC_Project/yt_ATAC/lastbam/${sample}.last.stat
bedtools bamtobed -i /home/xintong/ATAC_Project/yt_ATAC/lastbam/${sample}.last.bam > /home/xintong/ATAC_Project/yt_ATAC/lastbam/${sample}.last.bed
done
bam to bw and ploting
###atac_seq虚拟环境
Projpath=/home/xintong/ATAC_Project/yt_ATAC
mkdir -p ${Projpath}/bw
cd ${Projpath}/lastbam
ls *.bam |while read id;do
echo $id
bamCoverage -p 30 -bs 10 --normalizeUsing RPKM --smoothLength 20 -b $id -o ${Projpath}/bw/${id%%.*}.bw
done
cd ${Projpath}/bw
computeMatrix scale-regions -p 30 \
-b 10000 -a 10000 \
-R /home/ruiqing/reference/all.gtf \
-S ${Projpath}/bw/*.bw \
--skipZeros -o matrix1_test_body.gz \
--outFileSortedRegions matrix1_test.bed
plotProfile -m matrix1_test_body.gz -out test_body_Profile.png
plotHeatmap -m matrix1_test_body.gz -out test_body_Heatmap.png
mkdir -p ${Projpath}/peaks
###########如果atac_seq虚拟环境下有macs2,可执行下列脚本
cd ${Projpath}/lastbam
#callpeak.sh
ls *.bed | while read id ;do (macs2 callpeak -t $id -g 4e8 --nomodel --shift -100 --extsize 200 -n ${id%%.*} --outdir ${Projpath}/peaks) ;done
peaks calling
#################rnaseq环境下
Projpath=/home/xintong/ATAC_Project/yt_ATAC
cd ${Projpath}/lastbam
#callpeak.sh
ls *.bed | while read id ;do (macs2 callpeak -t $id -g 4e8 --nomodel --shift -100 --extsize 200 -n ${id%%.*} --outdir ${Projpath}/peaks) ;done
bedtools intersect -a 370-1-1_peaks.narrowPeak -b 370-1-2_peaks.narrowPeak 370-1-3_peaks.narrowPeak -wo > 370_merge.bed
bedtools intersect -a ZH11-1-1_peaks.narrowPeak -b ZH11-1-2_peaks.narrowPeak ZH11-1-3_peaks.narrowPeak -wo >ZH11_merge.bed