From 68fe00c7634a7f184a424031401d83f817d2ea53 Mon Sep 17 00:00:00 2001 From: Laura Cook <l.cook2@student.unimelb.edu.au> Date: Fri, 18 Sep 2020 10:39:44 +1000 Subject: [PATCH] altered to run on 10M subsampled BAM files --- dunnart/.DS_Store | Bin 6148 -> 8196 bytes dunnart/Snakefile | 92 +++++++++++++++++++++++----------------------- 2 files changed, 45 insertions(+), 47 deletions(-) diff --git a/dunnart/.DS_Store b/dunnart/.DS_Store index 1f0f8cc43d7b5d96ca892b4b8cf87f30d8422eca..7b5d11d76760a5b90b3f96da8e1f431d92628504 100644 GIT binary patch literal 8196 zcmeHLO>Epm6n-yF6WjfX6Iv2)f3ivx8X#?xG(Rf++hmiHsEHb7(<W&`Q}5bMO}%zl zdw1KWfdE1(s3)YBmJ@;tRS$?FI29x=C>-EWDq4^@aNqzJ4jd2&1aD@%QMze30%|?d z%p1>pZ#?hwXTN<50N9;1S^#PQK%&m1x`u}PG^S^LSt|)vWRgh!02WMx*KfP2q`k~K zTnG^e5eN|o5eN|o5%@nKKslQ=X@$HmwBZ;c5F+qUBEY^6QR+;_0y!@v|8&s6O8}Ck zw9FgLsSe;i(LlxmIWHu4r9DOUfPpIpB?bz2(npy&$ygxgg%s|9!W}TUGX@n3^1G8? zl$!&_gbc?Jfe?Y^2(a3{98%zd11Fc(?{1Pb>}A?Rz40&JXvSW3lWcQ^I)BmbT9KU$ z!_W&|&;x@ccR(5p@y|fP179ifC<l*DlA%+~fM2{n{_@Y($-Fm{wE5l($r;=7{O0C6 zShA{gby>M2DUz~Ic_wkn%_n@_&wAZ@W`bP}>Heu?FduPUdvUHOk6LDaO5G559bdN` zGsB%N!z7c*QOhyhe7Bo*43F=V5DH0>C3#A%o}F!JZ;I~P-8R=0o$c&wZi=?;>X@68 zrRs*3ef=lWXV1-^pL_laMMMpQPa0H+>&5XZ&5AIwy<A*GS@L=z%DaUqWo3=l*RLEJ zvr?{;u+EuNYK2EZ_S5B1#>`Pbqbb+UrX6oetxV~5Dr@V$*=5_z*c<cBGd`Og^W2Od z=Y~_Nn)S?KlkBXV*+bUkTE%ucJmXug;~h0K9?AMxvpBYVeeK3Ab&oyS-2T-5`0aJ; zE7Zy=ML9<BIKFk#GBZc=mf@co*S(bK7?v|VnzKABX`3UCo;EA7NtSOvT3xeFi)fn* zs$v&Ot(V5KN$a_+<)7vE*jlS8lLnn`&^6dy4D~}v=9>CiO`Bkvc*dmBkR-mmtx;1( zJc?D-1H-26iZXJVo~!E75!k_7S{3bxttU-eHDuw|(Wz*oIT5mYC|KN6rD$U*H=U*s zy<qJ?Ow*3D+YacSe|W}p=tv86Zys8$c2JVWnYPO?=t%XkMgbnjKvr2U)SwR9U@r{8 zI2e$Ji*N}p!`pBjK7yO@8GH#}!PjsH?!x!*Bm4}%!SC<~{E1~)ftztF)?ySN$9C+% zPK;qMK8=0YkHa{M<9G}w(7<QW#xy#Z#~1KLd<kF1i+Bky;~V%EzK8GQ4g3Ia;>Y+2 zF5u^Q3-91vamtbcUkAi>VZ?7~ww$lQTUj!GKfXqK4=VBhl&{~c6(oIR!^W-kjoVv0 z_r?|<X3-N|$|Fu5!4A%v0xS3<yoZYHnQM_v%9fSre1u46o{<>$)pB7SjnqlR7nKu6 zi!?;GNyKQiglm;2B4nvc>MCKpQ)!Wiq3UX(ZBsgA;;>pOw7Zo(5>Ztx6WUG60a;c- zTr@=DvZP`$0av8oHMjwv!WVFhX!jlb06)R62q@uttl^Za!+LDM9k>%)uoc^IAMPje zbzvL_a1amU5D{<!C-FE=p-u!m%?UVzXYm}K$2lV61$-4>TO{IDd=uZncZ+D(S0J2_ z$P!Nx?Q)5XW4q2YX&6~$NKnes;WVW#mgH<O?@dKK$z55J<iyqw__#{MSHtT6-j)CV zf51@)!wV63xDmk8o<vVKVNgiy#c;4%J4*cob=H{OypY_5240Sn<mEWYpd9D6Q8Hyd du|UoXNh{R;^*;iNey@i4Kg|E4$bGo-|KGpRWAy+4 delta 123 zcmZp1XfcprU|?W$DortDU=RQ@Ie-{MGjdEU6q~50$jCY|U^gS{WFCQ?n?Hz&Gj43q zW?jtA!6C>DR0IS9+(5z=q<&-Jcjn3bGL{M;VMYc9CMeAUqJbQc$so%ZHplbKVFm!= CLlK$) diff --git a/dunnart/Snakefile b/dunnart/Snakefile index ba382bd..eace19d 100644 --- a/dunnart/Snakefile +++ b/dunnart/Snakefile @@ -88,21 +88,20 @@ rule all: # expand("results_10M/logs/{sample}_filt_15Mreads.SE.spp.log", sample=all_samples), # expand("results_10M/qc/{sample}_filt_15Mreads.SE.cc.qc", sample=all_samples), # expand("results_10M/qc/{sample}_filt_15Mreads.SE.cc.plot.pdf", sample=all_samples), - expand("results_10M/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS), - expand("results_10M/macs2/{case}_vs_{control}_macs2_peaks.xls", zip, case=IPS, control=INPUTS), - expand("results_10M/macs2/{case}_vs_{control}_macs2_summits.bed", zip, case=IPS, control=INPUTS), + expand("results_10M/macs2/{case}_vs_{control}_macs2_default_peaks.narrowPeak", zip, case=IPS, control=INPUTS), + expand("results_10M/macs2/{case}_vs_{control}_macs2_default_peaks.xls", zip, case=IPS, control=INPUTS), + expand("results_10M/macs2/{case}_vs_{control}_macs2_default_summits.bed", zip, case=IPS, control=INPUTS), #expand("results_10M/qc/{case}-vs-{control}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS), - # expand("results_10M/bowtie2/{case}.bedpe", case=IPS), - # expand("results_10M/ggplot(promoter_annot, aes(x=distanceToTSS, y=width)) + - #expand("results_10M/logs/{case}.bamToBed", case=IPS), - # expand("results_10M/qc/{case}_vs_{control}.frip.txt", case=IPS, control=INPUTS), - # "results_10M/macs2/H3K4me3_pooled_macs2_peaks.narrowPeak", - # "results_10M/macs2/H3K27ac_pooled_macs2_peaks.narrowPeak", - # "results_10M/macs2/H3K4me3_overlap.narrowPeak", - # "results_10M/macs2/H3K27ac_overlap.narrowPeak", - # "results_10M/qc/H3K4me3_overlap.frip", - # "results_10M/qc/H3K27ac_overlap.frip" - # directory("results_10M/multiqc/multiqc_report_data/"), + expand("results_10M/bowtie2/{case}.bedpe", case=IPS), + expand("results_10M/logs/{case}.bamToBed", case=IPS), + expand("results_10M/qc/{case}_vs_{control}.frip_default.txt", case=IPS, control=INPUTS), + "results_10M/macs2/H3K4me3_pooled_macs2_default_peaks.narrowPeak", + "results_10M/macs2/H3K27ac_pooled_macs2_default_peaks.narrowPeak", + "results_10M/macs2/H3K4me3_overlap_default.narrowPeak", + "results_10M/macs2/H3K27ac_overlap_default.narrowPeak", + "results_10M/qc/H3K4me3_overlap_default.frip", + "results_10M/qc/H3K27ac_overlap_default.frip" + #directory("results_10M/multiqc/multiqc_report_data/"), # "results_10M/multiqc/multiqc_report.html" # =============================================================================================== # 1. FASTQC @@ -518,17 +517,17 @@ rule call_peaks_macs2: control = "results_10M/bowtie2/{control}_PPq30.sorted.dedup.bam", case = "results_10M/bowtie2/{case}_PPq30.sorted.dedup.bam" output: - "results_10M/macs2/{case}_vs_{control}_macs2_peaks.xls", - "results_10M/macs2/{case}_vs_{control}_macs2_summits.bed", - "results_10M/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak", + "results_10M/macs2/{case}_vs_{control}_macs2_default_peaks.xls", + "results_10M/macs2/{case}_vs_{control}_macs2_default_summits.bed", + "results_10M/macs2/{case}_vs_{control}_macs2_default_peaks.narrowPeak", log: - "results_10M/logs/{case}_vs_{control}_call_peaks_macs2.log" + "results_10M/logs/{case}_vs_{control}_call_peaks_macs2_default.log" params: - name = "{case}_vs_{control}_macs2_P10-2", + name = "{case}_vs_{control}_macs2_default", shell: " macs2 callpeak -f BAMPE -t {input.case} \ -c {input.control} --keep-dup all \ - --outdir results_10M/macs2/ -p 0.01 \ + --outdir results_10M/macs2/ \ -n {params.name} \ -g 2740338543 2> {log} " @@ -538,22 +537,22 @@ rule call_peaks_macs2_pooled_replicates: H3K27ac = "results_10M/bowtie2/H3K27ac_pooled_PPq30.sorted.dedup.bam", input = "results_10M/bowtie2/input_pooled_PPq30.sorted.dedup.bam" output: - "results_10M/macs2/H3K4me3_pooled_macs2_peaks.xls", - "results_10M/macs2/H3K4me3_pooled_macs2_summits.bed", - "results_10M/macs2/H3K4me3_pooled_macs2_peaks.narrowPeak", - "results_10M/macs2/H3K27ac_pooled_macs2_peaks.xls", - "results_10M/macs2/H3K27ac_pooled_macs2_summits.bed", - "results_10M/macs2/H3K27ac_pooled_macs2_peaks.narrowPeak" + "results_10M/macs2/H3K4me3_pooled_macs2_default_peaks.xls", + "results_10M/macs2/H3K4me3_pooled_macs2_default_summits.bed", + "results_10M/macs2/H3K4me3_pooled_macs2_default_peaks.narrowPeak", + "results_10M/macs2/H3K27ac_pooled_macs2_default_peaks.xls", + "results_10M/macs2/H3K27ac_pooled_macs2_default_summits.bed", + "results_10M/macs2/H3K27ac_pooled_macs2_default_peaks.narrowPeak" log: - H3K4me3 ="results_10M/logs/H3K4me3_pooled_call_peaks_macs2.log", - H3K27ac ="results_10M/logs/H3K27ac_pooled_call_peaks_macs2.log" + H3K4me3 ="results_10M/logs/H3K4me3_pooled_call_peaks_macs2_default.log", + H3K27ac ="results_10M/logs/H3K27ac_pooled_call_peaks_macs2_default.log" params: - H3K4me3 = "H3K4me3_pooled_macs2", - H3K27ac = "H3K27ac_pooled_macs2" + H3K4me3 = "H3K4me3_pooled_macs2_default", + H3K27ac = "H3K27ac_pooled_macs2_default" run: shell(" macs2 callpeak -f BAMPE -t {input.H3K4me3} \ -c {input.input} --keep-dup all \ - --outdir results_10M/macs2/ -p 0.01 \ + --outdir results_10M/macs2/ \ -n {params.H3K4me3} \ -g 2740338543 2> {log.H3K4me3} ") shell("macs2 callpeak -f BAMPE -t {input.H3K27ac} \ @@ -598,9 +597,9 @@ rule bamToBed: rule frip: input: bed = "results_10M/bowtie2/{case}.bedpe", - peak = "results_10M/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak" + peak = "results_10M/macs2/{case}_vs_{control}_macs2_default_peaks.narrowPeak" output: - "results_10M/qc/{case}_vs_{control}.frip.txt" + "results_10M/qc/{case}_vs_{control}.frip_default.txt" shell: "python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}" @@ -614,22 +613,22 @@ rule frip: rule overlap_peaks_H3K4me3: input: - peak1="results_10M/macs2/A-2_H3K4me3_vs_A-1_input_macs2_peaks.narrowPeak", - peak2="results_10M/macs2/B-2_H3K4me3_vs_B-1_input_macs2_peaks.narrowPeak", - pooled="results_10M/macs2/H3K4me3_pooled_macs2_peaks.narrowPeak" + peak1="results_10M/macs2/A-2_H3K4me3_vs_A-1_input_macs2_default_peaks.narrowPeak", + peak2="results_10M/macs2/B-2_H3K4me3_vs_B-1_input_macs2_default_peaks.narrowPeak", + pooled="results_10M/macs2/H3K4me3_pooled_macs2_default_peaks.narrowPeak" output: - "results_10M/macs2/H3K4me3_overlap.narrowPeak" + "results_10M/macs2/H3K4me3_overlap_default.narrowPeak" shell: "python2.7 scripts/overlap_peaks.py {input.peak1} {input.peak2} {input.pooled} {output}" rule overlap_peaks_H3K27ac: input: - peak1="results_10M/macs2/A-3_H3K27ac_vs_A-1_input_macs2_peaks.narrowPeak", - peak2="results_10M/macs2/B-3_H3K27ac_vs_B-1_input_macs2_peaks.narrowPeak", - pooled="results_10M/macs2/H3K27ac_pooled_macs2_peaks.narrowPeak" + peak1="results_10M/macs2/A-3_H3K27ac_vs_A-1_input_macs2_default_peaks.narrowPeak", + peak2="results_10M/macs2/B-3_H3K27ac_vs_B-1_input_macs2_default_peaks.narrowPeak", + pooled="results_10M/macs2/H3K27ac_pooled_macs2_default_peaks.narrowPeak" output: - "results_10M/macs2/H3K27ac_overlap.narrowPeak" + "results_10M/macs2/H3K27ac_overlap_default.narrowPeak" shell: "python2.7 scripts/overlap_peaks.py {input.peak1} {input.peak2} {input.pooled} {output}" @@ -638,17 +637,16 @@ rule overlap_frip: input: H3K4me3bam = "results_10M/bowtie2/H3K4me3_pooled_PPq30.sorted.dedup.bam", H3K27acbam = "results_10M/bowtie2/H3K27ac_pooled_PPq30.sorted.dedup.bam", - H3K4me3bed = "results_10M/macs2/H3K4me3_overlap.narrowPeak", - H3K27acbed = "results_10M/macs2/H3K27ac_overlap.narrowPeak" + H3K4me3bed = "results_10M/macs2/H3K4me3_overlap_default.narrowPeak", + H3K27acbed = "results_10M/macs2/H3K27ac_overlap_default.narrowPeak" output: - H3K4me3frip = "results_10M/qc/H3K4me3_overlap.frip", - H3K27acfrip = "results_10M/qc/H3K27ac_overlap.frip" + H3K4me3frip = "results_10M/qc/H3K4me3_overlap_default.frip", + H3K27acfrip = "results_10M/qc/H3K27ac_overlap_default.frip" run: shell("python2.7 scripts/encode_frip.py {input.H3K4me3bam} {input.H3K4me3bed} > {output.H3K4me3frip}") shell("python2.7 scripts/encode_frip.py {input.H3K27acbam} {input.H3K27acbed} > {output.H3K27acfrip}") -# =============================================================================================== -# 10. Combine all QC into multiqc output +tyui# 10. Combine all QC into multiqc output # =============================================================================================== # # rule multiqc: -- GitLab