From 656f69828621d5a3464f98c74d1bdc4a8c7b4089 Mon Sep 17 00:00:00 2001
From: Laura Cook <l.cook2@student.unimelb.edu.au>
Date: Mon, 20 Jul 2020 13:55:20 +1000
Subject: [PATCH] narrowpeak counts

---
 mouse/Snakefile | 106 +++++++++++++++++++-----------------------------
 1 file changed, 41 insertions(+), 65 deletions(-)

diff --git a/mouse/Snakefile b/mouse/Snakefile
index fc83017..4c6703b 100644
--- a/mouse/Snakefile
+++ b/mouse/Snakefile
@@ -54,30 +54,27 @@ rule all:
         expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
         expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai", zip, sample=all_samples, stage=STAGE, mark=MARK),
         expand("logs/{sample}_{stage}_{mark}.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
-        # #expand("results/preseq/{sample}_{stage}_{mark}.ccurve.txt", zip, sample=all_samples, stage=STAGE, mark=MARK),
-        # #expand("results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt", zip, sample=all_samples, stage=STAGE, mark=MARK),
-        # #expand("logs/{sample}_{stage}_{mark}.picardLibComplexity", zip, sample=all_samples, stage=STAGE, mark=MARK),
-         "results/deeptools/multibamsum.npz",
-         "results/deeptools/multibamsum.tab",
-         "results/deeptools/pearsoncor_multibamsum.png",
-         "results/deeptools/pearsoncor_multibamsum_matrix.txt",
-         expand("results/deeptools/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK),
-         "results/deeptools/multiBAM_fingerprint.png",
-         "results/deeptools/multiBAM_fingerprint_metrics.txt",
-         "results/deeptools/multiBAM_fingerprint_rawcounts.txt",
-         "results/deeptools/plot_coverage.png",
-         "results/deeptools/plot_coverage_rawcounts.tab",
-         expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK),
-         expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.Rdata", zip, sample=all_samples, stage=STAGE, mark=MARK),
-         expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.out", zip, sample=all_samples, stage=STAGE, mark=MARK),
-         expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
-         expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
-         expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
-         expand("logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
-         #expand("results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
-        # expand("results/bwa/{case}_{stage}_{mark}.bed", zip, case=IPS, stage=STAGE, mark=MARK),
-        # expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK),
-        # expand("results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt", case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
+        "results/deeptools/multibamsum.npz",
+        "results/deeptools/multibamsum.tab",
+        "results/deeptools/pearsoncor_multibamsum.png",
+        "results/deeptools/pearsoncor_multibamsum_matrix.txt",
+        expand("results/deeptools/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK),
+        "results/deeptools/multiBAM_fingerprint.png",
+        "results/deeptools/multiBAM_fingerprint_metrics.txt",
+        "results/deeptools/multiBAM_fingerprint_rawcounts.txt",
+        "results/deeptools/plot_coverage.png",
+        "results/deeptools/plot_coverage_rawcounts.tab",
+        expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK),
+        expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.Rdata", zip, sample=all_samples, stage=STAGE, mark=MARK),
+        expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.out", zip, sample=all_samples, stage=STAGE, mark=MARK),
+        expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
+        expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
+        expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
+        expand("logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
+        expand("results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
+        #expand("results/bwa/{case}_{stage}_{mark}.bed", zip, case=IPS, stage=STAGE, mark=MARK),
+        #expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK),
+        #expand("results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt", case=IPS, control=INPUTS, stage=STAGE, mark=MARK)
         # "results/macs2/E10.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
         # "results/macs2/E11.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
         # "results/macs2/E12.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
@@ -151,7 +148,7 @@ rule markDups:
         "picard MarkDuplicates I={input} O={output.bam} \
         METRICS_FILE={output.dupQC} REMOVE_DUPLICATES=true ASSUME_SORTED=true 2> {log}"
 
-rule filter2:
+rule sort:
     input:
         "results/bwa/{sample}_{stage}_{mark}_q30.dedup.bam"
     output:
@@ -159,7 +156,7 @@ rule filter2:
     log:
         "logs/{sample}_{stage}_{mark}.dedup"
     shell:
-        "samtools view -b -F 1804 {input} | samtools sort -o {output} - 2> {log}"
+        "samtools sort -o {output} - 2> {log}"
 
 rule indexBam:
     input:
@@ -185,28 +182,7 @@ rule mappingStats:
         "logs/{sample}_{stage}_{mark}.flagstat.qc"
     shell:
         "samtools flagstat {input} > {output}"
-#
-# # rule preseq:
-# #     input:
-# #         "results/bwa/{sample}_{stage}_{mark}.bam"
-# #     output:
-# #         "results/preseq/{sample}_{stage}_{mark}.ccurve.txt"
-# #     log:
-# #         "logs/{sample}_{stage}_{mark}.preseq"
-# #     shell:
-# #         "preseq lc_extrap -v -output {output} -bam {input} 2> {log}"
-# #
-# #
-# # rule get_picard_complexity_metrics:
-# #     input:
-# #         "results/bwa/{sample}_{stage}_{mark}.bam"
-# #     output:
-# #         "results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt"
-# #     log:
-# #         "logs/{sample}_{stage}_{mark}.picardLibComplexity"
-# #     shell:
-# #         "picard -Xmx6G EstimateLibraryComplexity INPUT={input} OUTPUT={output} USE_JDK_DEFLATER=TRUE USE_JDK_INFLATER=TRUE VERBOSITY=ERROR"
-#
+
 # # ===============================================================================================
 # #  5. deepTools
 # #   > multiBAMsummary
@@ -335,7 +311,7 @@ rule call_peaks_macs2:
         -c {input.control} \
         --outdir results/macs2/ \
         -n {params.name} \
-        -g mm 2> {log} "
+        -g 1.87e9 2> {log} "
 
 
 # # ===============================================================================================
@@ -345,25 +321,25 @@ rule call_peaks_macs2:
 # # ===============================================================================================
 
 #peak counts in a format that multiqc can handle
-# rule get_narrow_peak_counts_for_multiqc:
-#     input:
-#         peaks = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak"
-#     output:
-#         "results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json"
-#     params:
-#         peakType = "narrowPeak"
-#     shell:
-#         "python3 scripts/count_peaks.py \
-#         --peak_type {params.peakType} \
-#         --peaks {input.peaks} --sample_name {wildcards.case} > {output}"
-#
-#
+rule get_narrow_peak_counts_for_multiqc:
+    input:
+        peaks = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak"
+    output:
+        "results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json"
+    params:
+        peakType = "narrowPeak"
+    shell:
+        "python2.7 scripts/count_peaks.py \
+        --peak_type {params.peakType} \
+        --peaks {input.peaks} --sample_name {wildcards.case}_{wildcards.stage}_{wildcards.mark} > {output}"
+
+
 # ## Convert BAM to tagAlign file for calculating FRiP QC metric (Fraction of reads in peaks)
 # rule bamToBed:
 #     input:
 #         "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam"
 #     output:
-#         "results/bwa/{case}_{stage}_{mark}.bed"
+#         "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed"
 #     log:
 #         "logs/{case}_{stage}_{mark}.bamToBed"
 #     shell:
@@ -373,13 +349,13 @@ rule call_peaks_macs2:
 # ## Fraction of reads in peaks
 # rule frip:
 #     input:
-#         bed = "results/bwa/{case}_{stage}_{mark}.bed",
+#         bed = "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed",
 #         peak = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak"
 #     output:
 #         "results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt"
 #     shell:
 #         "python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}"
-
+#
 
 # # ===============================================================================================
 # #  9. Create consensus peaksets for replicates
-- 
GitLab