From 8e388bed17911bfbc81b16261b7a956f4bf4bcf2 Mon Sep 17 00:00:00 2001
From: Laura Cook <l.cook2@student.unimelb.edu.au>
Date: Thu, 16 Jul 2020 14:33:31 +1000
Subject: [PATCH] removed --extend_reads in bamCoverage because mouse data in
 single-ended unlike dunnart reads. Added indexed bam as an input for
 deepTools modules because otherwise it will try and run without first doing
 the index rule

---
 mouse/Snakefile | 425 ++++++++++++++++++++++++------------------------
 1 file changed, 209 insertions(+), 216 deletions(-)

diff --git a/mouse/Snakefile b/mouse/Snakefile
index 89ce6d4..fc83017 100644
--- a/mouse/Snakefile
+++ b/mouse/Snakefile
@@ -30,8 +30,8 @@ with open(config['SAMPLES'], "r") as f:
     # skip the header
     header = next(reader)
     for row in reader:
-        STAGE.append(row[1])
-        MARK.append(row[2])
+        STAGE.append(row[2])
+        MARK.append(row[1])
         IPS.append(row[3])
         INPUTS.append(row[4])
 
@@ -49,50 +49,48 @@ all_samples = IPS + unique_inputs
 
 rule all:
     input:
-        expand("results/bwa/{sample}_{stage}_{mark}.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
-        expand("results/bwa/{sample}_{stage}_{mark}_PPq30.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
-        expand("results/bwa/{sample}_{stage}_{mark}_PPq30.sorted.dupmark.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
-        expand("results/bwa/{sample}_{stage}_{mark}_PPq30.sorted.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
-        expand("results/bwa/{sample}_{stage}_{mark}_PPq30.sorted.dedup.bai", zip, sample=all_samples, stage=STAGE, mark=MARK),
+        expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
+        expand("results/bwa/{sample}_{stage}_{mark}_q30.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
+        expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
+        expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai", zip, sample=all_samples, stage=STAGE, mark=MARK),
         expand("logs/{sample}_{stage}_{mark}.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
-        expand("results/preseq/{sample}_{stage}_{mark}.ccurve.txt", zip, sample=all_samples, stage=STAGE, mark=MARK),
-        expand("results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt", zip, sample=all_samples, stage=STAGE, mark=MARK),
-        expand("logs/{sample}_{stage}_{mark}.picardLibComplexity", zip, sample=all_samples, stage=STAGE, mark=MARK),
-        "results/deeptools/multibamsum.npz",
-        "results/deeptools/multibamsum.tab",
-        "results/deeptools/pearsoncor_multibamsum.png",
-        "results/deeptools/pearsoncor_multibamsum_matrix.txt",
-        expand("results/deeptools/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK),
-        "results/deeptools/multiBAM_fingerprint.png",
-        "results/deeptools/multiBAM_fingerprint_metrics.txt",
-        "results/deeptools/multiBAM_fingerprint_rawcounts.txt",
-        "results/deeptools/plot_coverage.png",
-        "results/deeptools/plot_coverage_rawcounts.tab",
-        "results/deeptools/bamPEFragmentSize_hist.png",
-        "results/deeptools/bamPEFragmentSize_rawcounts.tab",
-        expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK),
-        expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.Rdata", zip, sample=all_samples, stage=STAGE, mark=MARK),
-        expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.out", zip, sample=all_samples, stage=STAGE, mark=MARK),
-        expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
-        expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
-        expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
-        expand("results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
-        expand("results/bwa/{case}_{stage}_{mark}.bedpe", zip, case=IPS, stage=STAGE, mark=MARK),
-        expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK),
-        expand("results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt", case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
-        "results/macs2/E10.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
-        "results/macs2/E11.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
-        "results/macs2/E12.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
-        "results/macs2/E13.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
-        "results/macs2/E14.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
-        "results/macs2/E15.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
-        "results/macs2/E10.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
-        "results/macs2/E11.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
-        "results/macs2/E12.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
-        "results/macs2/E13.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
-        "results/macs2/E14.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
-        "results/macs2/E15.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
-        expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK)
+        # #expand("results/preseq/{sample}_{stage}_{mark}.ccurve.txt", zip, sample=all_samples, stage=STAGE, mark=MARK),
+        # #expand("results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt", zip, sample=all_samples, stage=STAGE, mark=MARK),
+        # #expand("logs/{sample}_{stage}_{mark}.picardLibComplexity", zip, sample=all_samples, stage=STAGE, mark=MARK),
+         "results/deeptools/multibamsum.npz",
+         "results/deeptools/multibamsum.tab",
+         "results/deeptools/pearsoncor_multibamsum.png",
+         "results/deeptools/pearsoncor_multibamsum_matrix.txt",
+         expand("results/deeptools/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK),
+         "results/deeptools/multiBAM_fingerprint.png",
+         "results/deeptools/multiBAM_fingerprint_metrics.txt",
+         "results/deeptools/multiBAM_fingerprint_rawcounts.txt",
+         "results/deeptools/plot_coverage.png",
+         "results/deeptools/plot_coverage_rawcounts.tab",
+         expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK),
+         expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.Rdata", zip, sample=all_samples, stage=STAGE, mark=MARK),
+         expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.out", zip, sample=all_samples, stage=STAGE, mark=MARK),
+         expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
+         expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
+         expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
+         expand("logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
+         #expand("results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
+        # expand("results/bwa/{case}_{stage}_{mark}.bed", zip, case=IPS, stage=STAGE, mark=MARK),
+        # expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK),
+        # expand("results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt", case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
+        # "results/macs2/E10.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
+        # "results/macs2/E11.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
+        # "results/macs2/E12.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
+        # "results/macs2/E13.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
+        # "results/macs2/E14.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
+        # "results/macs2/E15.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
+        # "results/macs2/E10.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
+        # "results/macs2/E11.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
+        # "results/macs2/E12.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
+        # "results/macs2/E13.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
+        # "results/macs2/E14.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
+        # "results/macs2/E15.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
+        # expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK)
         # directory("results/multiqc/multiqc_report_data/"),
         # "results/multiqc/multiqc_report.html"
 
@@ -120,10 +118,15 @@ rule all:
 # ===============================================================================================
 #  4. FILTERING
 #   > remove unmapped, mate unmapped
-#   > remove low MAPQ reads (-q)
-#   > keep proper pairs (-f)
+#   > remove low MAPQ reads (-q 30)
+#   > -F 1804
+#           -Read unmapped
+#           -Mate unmapped
+#           -Not primary alignment
+#           -Read fails platform/vendor quality checks
+#           -Read is PCR or optical duplicate
 #   > mark duplicates & remove duplicates (picard)
-#   > index final position sorted BAM
+#   > re-filter, sort and index final BAM
 # ===============================================================================================
 
 rule filter:
@@ -132,22 +135,32 @@ rule filter:
     output:
         "results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam"
     log:
-        "logs/{sample}_{stage}_{mark}.dedup"
+        "logs/{sample}_{stage}_{mark}.filter"
     shell:
-        "samtools sort {input} | samtools view -b -q 30 -o {output} - 2> {log}"
+        "samtools view -b -F 1804 -q 30 {input} | samtools sort -o {output} - 2> {log}"
 
 rule markDups:
     input:
         "results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam"
     output:
-        bam="results/bwa/{sample}_{stage}_{mark}_q30.sorted.dupmark.bam",
-        dupQC="results/bwa/{sample}_{stage}_{mark}.dup.qc"
+        bam="results/bwa/{sample}_{stage}_{mark}_q30.dedup.bam",
+        dupQC="results/bwa/{sample}_{stage}_{mark}.dedup.qc"
     log:
-        "logs/{sample}_{stage}_{mark}.dupmark"
+        "logs/{sample}_{stage}_{mark}.dedup"
     shell:
         "picard MarkDuplicates I={input} O={output.bam} \
         METRICS_FILE={output.dupQC} REMOVE_DUPLICATES=true ASSUME_SORTED=true 2> {log}"
 
+rule filter2:
+    input:
+        "results/bwa/{sample}_{stage}_{mark}_q30.dedup.bam"
+    output:
+        "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
+    log:
+        "logs/{sample}_{stage}_{mark}.dedup"
+    shell:
+        "samtools view -b -F 1804 {input} | samtools sort -o {output} - 2> {log}"
+
 rule indexBam:
     input:
         "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
@@ -158,13 +171,13 @@ rule indexBam:
     shell:
         "samtools index {input} {output} 2> {log}"
 
-# ===============================================================================================
-#  4. ALIGNMENT QC
-#   > SAMtools flagstat statistics
-#   > CollectMultipleMetrics (picard)
-#   > Compute Library Complexity (PreSeq)
-# ===============================================================================================
-
+# # ===============================================================================================
+# #  4. ALIGNMENT QC
+# #   > SAMtools flagstat statistics
+# #   > CollectMultipleMetrics (picard)
+# #   > Compute Library Complexity (PreSeq)
+# # ===============================================================================================
+#
 rule mappingStats:
     input:
         "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
@@ -172,35 +185,35 @@ rule mappingStats:
         "logs/{sample}_{stage}_{mark}.flagstat.qc"
     shell:
         "samtools flagstat {input} > {output}"
-
-rule preseq:
-    input:
-        "results/bwa/{sample}_{stage}_{mark}.bam"
-    output:
-        "results/preseq/{sample}_{stage}_{mark}.ccurve.txt"
-    log:
-        "logs/{sample}_{stage}_{mark}.preseq"
-    shell:
-        "preseq lc_extrap -v -output {output} -bam {input} 2> {log}"
-
-
-rule get_picard_complexity_metrics:
-    input:
-        "results/bwa/{sample}_{stage}_{mark}.bam"
-    output:
-        "results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt"
-    log:
-        "logs/{sample}_{stage}_{mark}.picardLibComplexity"
-    shell:
-        "picard -Xmx6G EstimateLibraryComplexity INPUT={input} OUTPUT={output} USE_JDK_DEFLATER=TRUE USE_JDK_INFLATER=TRUE VERBOSITY=ERROR"
-
-# ===============================================================================================
-#  5. deepTools
-#   > multiBAMsummary
-#   > plotCorrelation
-#   > plotFingerprint
-#   > bamCoverage (read depth normalised bigWig files)
-# ===============================================================================================
+#
+# # rule preseq:
+# #     input:
+# #         "results/bwa/{sample}_{stage}_{mark}.bam"
+# #     output:
+# #         "results/preseq/{sample}_{stage}_{mark}.ccurve.txt"
+# #     log:
+# #         "logs/{sample}_{stage}_{mark}.preseq"
+# #     shell:
+# #         "preseq lc_extrap -v -output {output} -bam {input} 2> {log}"
+# #
+# #
+# # rule get_picard_complexity_metrics:
+# #     input:
+# #         "results/bwa/{sample}_{stage}_{mark}.bam"
+# #     output:
+# #         "results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt"
+# #     log:
+# #         "logs/{sample}_{stage}_{mark}.picardLibComplexity"
+# #     shell:
+# #         "picard -Xmx6G EstimateLibraryComplexity INPUT={input} OUTPUT={output} USE_JDK_DEFLATER=TRUE USE_JDK_INFLATER=TRUE VERBOSITY=ERROR"
+#
+# # ===============================================================================================
+# #  5. deepTools
+# #   > multiBAMsummary
+# #   > plotCorrelation
+# #   > plotFingerprint
+# #   > bamCoverage (read depth normalised bigWig files)
+# # ===============================================================================================
 
 rule deeptools_summary:
     input:
@@ -239,23 +252,19 @@ rule deeptools_correlation:
 
 rule deeptools_coverage:
     input:
-        "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
+        bam = "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam",
+        bai = "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"
     output:
         "results/deeptools/{sample}_{stage}_{mark}.SeqDepthNorm.bw"
     log:
         "logs/{sample}_{stage}_{mark}_coverage.deepTools"
     shell:
-        "bamCoverage \
-        --bam {input} \
-        --binSize 10 \
-        --normalizeUsing RPGC \
-        --effectiveGenomeSize add mm10 \
-        --extendReads \
-        -o {output} 2> {log}"
+        "bamCoverage --bam {input.bam} --binSize 10 --normalizeUsing RPGC --effectiveGenomeSize 2308125349 --numberOfProcessors 4 -o {output} 2> {log}"
 
 rule deeptools_fingerprint:
     input:
-        expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK)
+        bam = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK),
+        bai = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark = MARK)
     output:
         fig="results/deeptools/multiBAM_fingerprint.png",
         metrics="results/deeptools/multiBAM_fingerprint_metrics.txt",
@@ -265,7 +274,7 @@ rule deeptools_fingerprint:
         "logs/fingerprint.deepTools"
     shell:
         "plotFingerprint -p {threads} \
-        -b {input} \
+        -b {input.bam} \
         --plotFile {output.fig} \
         --outQualityMetrics {output.metrics} \
         --outRawCounts {output.rawcounts} \
@@ -275,22 +284,23 @@ rule deeptools_fingerprint:
 
 rule deeptools_plotCoverage:
     input:
-        expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK)
+        bam = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark=MARK),
+        bai = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark=MARK)
     output:
         fig="results/deeptools/plot_coverage.png",
         rawcounts="results/deeptools/plot_coverage_rawcounts.tab"
     log:
         "logs/plotCoverage.deepTools"
     shell:
-        "plotCoverage --plotFile {output.fig} \
+        "plotCoverage --bamfiles {input.bam} --plotFile {output.fig} \
         -n 1000000 \
         --outRawCounts {output.rawcounts} \
         --ignoreDuplicates 2> {log}"
 
 
-# ===============================================================================================
-#  6. phantomPeakQuals
-# ===============================================================================================
+# # ===============================================================================================
+# #  6. phantomPeakQuals
+# # ===============================================================================================
 
 rule phantomPeakQuals:
     input:
@@ -304,20 +314,20 @@ rule phantomPeakQuals:
     shell:
         "Rscript scripts/run_spp.R -c={input} -savp={output.savp} -savd={output.savd} -out={output.out} 2> {log}"
 
-# ===============================================================================================
-#  7. Call peaks (MACS2)
-# ===============================================================================================
+# # ===============================================================================================
+# #  7. Call peaks (MACS2)
+# # ===============================================================================================
 
 rule call_peaks_macs2:
     input:
-        control = "results/bwa/{control}_{stage}_{mark}_PPq30.sorted.dedup.bam",
-        case = "results/bwa/{control}_{stage}_{mark}_PPq30.sorted.dedup.bam"
+        control = "results/bwa/{control}_{stage}_{mark}_q30.sorted.dedup.bam",
+        case = "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam"
     output:
         "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls",
         "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed",
         "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak",
     log:
-        "logs/{case}_vs_{control}_call_peaks_macs2.log"
+        "logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log"
     params:
         name = "{case}_vs_{control}_{stage}_{mark}_macs2",
     shell:
@@ -328,116 +338,99 @@ rule call_peaks_macs2:
         -g mm 2> {log} "
 
 
-# ===============================================================================================
-#  8. Peak QC
-#   > narrow peak counts
-#   > fraction of reads in peaks (convert BAM to bed first then do intersect with peaks)
-# ===============================================================================================
-
-# peak counts in a format that multiqc can handle
-rule get_narrow_peak_counts_for_multiqc:
-    input:
-        peaks = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak"
-    output:
-        "results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json"
-    params:
-        peakType = "narrowPeak"
-    shell:
-        "python3 scripts/count_peaks.py \
-        --peak_type {params.peakType} \
-        --peaks {input.peaks} --sample_name {wildcards.case} > {output}"
-
-
-## Convert BAM to tagAlign file for calculating FRiP QC metric (Fraction of reads in peaks)
-rule bamToBed:
-    input:
-        "results/bwa/{case}_{stage}_{mark}_PPq30.sorted.dedup.bam"
-    output:
-        "results/bwa/{case}_{stage}_{mark}.bed"
-    log:
-        "logs/{case}_{stage}_{mark}_.bamToBed"
-    shell:
-        "samtools sort -n {input} | bedtools bamtobed -i - > {output}"
-
+# # ===============================================================================================
+# #  8. Peak QC
+# #   > narrow peak counts
+# #   > fraction of reads in peaks (convert BAM to bed first then do intersect with peaks)
+# # ===============================================================================================
 
-## Fraction of reads in peaks
-rule frip:
-    input:
-        bed = "results/bwa/{case}_{stage}_{mark}.bed",
-        peak = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak"
-    output:
-        "results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt"
-    shell:
-        "python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}"
-
-
-# ===============================================================================================
-#  9. Create consensus peaksets for replicates
-# ===============================================================================================
-
-# Based on ENCODE `overlap_peaks.py` - recommended for histone marks.
-# Need to pool peaks first for each replicate
+#peak counts in a format that multiqc can handle
+# rule get_narrow_peak_counts_for_multiqc:
+#     input:
+#         peaks = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak"
+#     output:
+#         "results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json"
+#     params:
+#         peakType = "narrowPeak"
+#     shell:
+#         "python3 scripts/count_peaks.py \
+#         --peak_type {params.peakType} \
+#         --peaks {input.peaks} --sample_name {wildcards.case} > {output}"
+#
+#
+# ## Convert BAM to tagAlign file for calculating FRiP QC metric (Fraction of reads in peaks)
+# rule bamToBed:
+#     input:
+#         "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam"
+#     output:
+#         "results/bwa/{case}_{stage}_{mark}.bed"
+#     log:
+#         "logs/{case}_{stage}_{mark}.bamToBed"
+#     shell:
+#         "samtools sort -n {input} | bedtools bamtobed -i - > {output}"
+#
+#
+# ## Fraction of reads in peaks
+# rule frip:
+#     input:
+#         bed = "results/bwa/{case}_{stage}_{mark}.bed",
+#         peak = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak"
+#     output:
+#         "results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt"
+#     shell:
+#         "python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}"
 
-rule pool_peaks:
-    input:
-        M1S1 = expand(["results/macs2/{case}_vs_{control}_E10.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
-        M1S2 = expand(["results/macs2/{case}_vs_{control}_E11.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
-        M1S3 = expand(["results/macs2/{case}_vs_{control}_E12.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
-        M1S4 = expand(["results/macs2/{case}_vs_{control}_E13.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
-        M1S5 = expand(["results/macs2/{case}_vs_{control}_E14.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
-        M1S6 = expand(["results/macs2/{case}_vs_{control}_E15.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
-        M2S1 = expand(["results/macs2/{case}_vs_{control}_E10.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
-        M2S2 = expand(["results/macs2/{case}_vs_{control}_E11.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
-        M2S3 = expand(["results/macs2/{case}_vs_{control}_E12.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
-        M2S4 = expand(["results/macs2/{case}_vs_{control}_E13.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
-        M2S5 = expand(["results/macs2/{case}_vs_{control}_E14.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
-        M2S6 = expand(["results/macs2/{case}_vs_{control}_E15.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS)
-    output:
-        M1S1 = "results/macs2/E10.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
-        M1S2 = "results/macs2/E11.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
-        M1S3 = "results/macs2/E12.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
-        M1S4 = "results/macs2/E13.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
-        M1S5 = "results/macs2/E14.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
-        M1S6 = "results/macs2/E15.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
-        M2S1 = "results/macs2/E10.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
-        M2S2 = "results/macs2/E11.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
-        M2S3 = "results/macs2/E12.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
-        M2S4 = "results/macs2/E13.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
-        M2S5 = "results/macs2/E14.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
-        M2S6 = "results/macs2/E15.5_H3K4me3_macs2_pooled_peaks.narrowPeak"
-
-    run:
-        shell("cat {input.M1S1} > {output.M1S1}")
-        shell("cat {input.M1S2} > {output.M1S2}")
-        shell("cat {input.M1S3} > {output.M1S3}")
-        shell("cat {input.M1S4} > {output.M1S4}")
-        shell("cat {input.M1S5} > {output.M1S5}")
-        shell("cat {input.M1S6} > {output.M1S6}")
-        shell("cat {input.M2S1} > {output.M2S1}")
-        shell("cat {input.M2S2} > {output.M2S2}")
-        shell("cat {input.M2S3} > {output.M2S3}")
-        shell("cat {input.M2S4} > {output.M2S4}")
-        shell("cat {input.M2S5} > {output.M2S5}")
-        shell("cat {input.M2S6} > {output.M2S6}")
-
-rule overlap_peaks_H3K4me3:
-    input:
-        peaks=expand(["results/macs2/{case}_vs_{control}_E10.5_H3K4me3_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E11.5_H3K4me3_macs2_peaks.narrowPeak"],["results/macs2/{case}_vs_{control}_E12.5_H3K4me3_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E13.5_H3K4me3_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E14.5_H3K4me3_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E15.5_H3K4me3_macs2_peaks.narrowPeak"],zip, case=IPS, control=INPUTS),
-        pooled=expand("results/macs2/{stage}_{mark}_macs2_pooled_peaks.narrowPeak", zip, stage = STAGE, mark = MARK)
-    output:
-        expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK)
-    shell:
-        "python2.7 scripts/overlap_peaks.py {input.peaks} {input.pooled} {output}"
 
+# # ===============================================================================================
+# #  9. Create consensus peaksets for replicates
+# # ===============================================================================================
+#
+# # Based on ENCODE `overlap_peaks.py` - recommended for histone marks.
+# # Need to pool peaks first for each replicate
+#
+# rule pool_peaks:
+#     input:
+#         E10 = expand(["results/macs2/{case}_vs_{control}_E10.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
+#         E11 = expand(["results/macs2/{case}_vs_{control}_E11.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
+#         E12 = expand(["results/macs2/{case}_vs_{control}_E12.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
+#         E13 = expand(["results/macs2/{case}_vs_{control}_E13.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
+#         E14 = expand(["results/macs2/{case}_vs_{control}_E14.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
+#         E15 = expand(["results/macs2/{case}_vs_{control}_E15.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK)
+#     output:
+#         E10 = "results/macs2/E10.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
+#         E11 = "results/macs2/E11.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
+#         E12 = "results/macs2/E12.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
+#         E13 = "results/macs2/E13.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
+#         E14 = "results/macs2/E14.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
+#         E15 = "results/macs2/E15.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
+#     run:
+#         shell("cat {input.E10} > {output.E10}")
+#         shell("cat {input.E11} > {output.E11}")
+#         shell("cat {input.E12} > {output.E12}")
+#         shell("cat {input.E13} > {output.E13}")
+#         shell("cat {input.E14} > {output.E14}")
+#         shell("cat {input.E15} > {output.E15}")
+#
+# rule overlap_peaks:
+#     input:
+#         E10= expand(["results/macs2/{case}_vs_{control}_E10.5_{mark}_macs2_peaks.narrowPeak"]),
+#         E11= expand(["results/macs2/{case}_vs_{control}_E11.5_{mark}_macs2_peaks.narrowPeak"]),
+#         E12= expand(["results/macs2/{case}_vs_{control}_E12.5_{mark}_macs2_peaks.narrowPeak"]),
+#         E13= expand(["results/macs2/{case}_vs_{control}_E13.5_{mark}_macs2_peaks.narrowPeak"]),
+#         E14= expand(["results/macs2/{case}_vs_{control}_E14.5_{mark}_macs2_peaks.narrowPeak"]),
+#         E15= expand(["results/macs2/{case}_vs_{control}_E15.5_{mark}_macs2_peaks.narrowPeak"]).
+#         E10_pooled=expand("results/macs2/E10.5_{mark}_macs2_pooled_peaks.narrowPeak")
+#         E11_pooled=expand("results/macs2/E11.5_{mark}_macs2_pooled_peaks.narrowPeak")
+#         E12_pooled=
+#         E13_pooled=
+#         E14_pooled=
+#         E15_pooled=
+#     output:
+#         expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK)
+#     shell:
+#         "python2.7 scripts/overlap_peaks.py {input.peaks} {input.pooled} {output}"
+#
 
-rule overlap_peaks_H3K27ac:
-    input:
-        peaks=expand(["results/macs2/{case}_vs_{control}_E10.5_H3K27ac_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E11.5_H3K27ac_macs2_peaks.narrowPeak"],["results/macs2/{case}_vs_{control}_E12.5_H3K27ac_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E13.5_H3K27ac_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E14.5_H3K27ac_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E15.5_H3K27ac_macs2_peaks.narrowPeak"],zip, case=IPS, control=INPUTS),
-        pooled=expand("results/macs2/{stage}_{mark}_macs2_pooled_peaks.narrowPeak", zip, stage = STAGE, mark = MARK)
-    output:
-        expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK)
-    shell:
-        "python2.7 scripts/overlap_peaks.py {input.peaks} {input.pooled} {output}"
 
 # ===============================================================================================
 #  10. QC on replicate peaks
-- 
GitLab