diff --git a/mouse/Snakefile b/mouse/Snakefile index 89ce6d42b93c7ca29f27168f94e9e86eb8c4f991..fc83017e193fd2e1876b1ba9b99443ecc79c5345 100644 --- a/mouse/Snakefile +++ b/mouse/Snakefile @@ -30,8 +30,8 @@ with open(config['SAMPLES'], "r") as f: # skip the header header = next(reader) for row in reader: - STAGE.append(row[1]) - MARK.append(row[2]) + STAGE.append(row[2]) + MARK.append(row[1]) IPS.append(row[3]) INPUTS.append(row[4]) @@ -49,50 +49,48 @@ all_samples = IPS + unique_inputs rule all: input: - expand("results/bwa/{sample}_{stage}_{mark}.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/bwa/{sample}_{stage}_{mark}_PPq30.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/bwa/{sample}_{stage}_{mark}_PPq30.sorted.dupmark.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/bwa/{sample}_{stage}_{mark}_PPq30.sorted.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/bwa/{sample}_{stage}_{mark}_PPq30.sorted.dedup.bai", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/bwa/{sample}_{stage}_{mark}_q30.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai", zip, sample=all_samples, stage=STAGE, mark=MARK), expand("logs/{sample}_{stage}_{mark}.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/preseq/{sample}_{stage}_{mark}.ccurve.txt", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("logs/{sample}_{stage}_{mark}.picardLibComplexity", zip, sample=all_samples, stage=STAGE, mark=MARK), - "results/deeptools/multibamsum.npz", - "results/deeptools/multibamsum.tab", - "results/deeptools/pearsoncor_multibamsum.png", - "results/deeptools/pearsoncor_multibamsum_matrix.txt", - expand("results/deeptools/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK), - "results/deeptools/multiBAM_fingerprint.png", - "results/deeptools/multiBAM_fingerprint_metrics.txt", - "results/deeptools/multiBAM_fingerprint_rawcounts.txt", - "results/deeptools/plot_coverage.png", - "results/deeptools/plot_coverage_rawcounts.tab", - "results/deeptools/bamPEFragmentSize_hist.png", - "results/deeptools/bamPEFragmentSize_rawcounts.tab", - expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.Rdata", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.out", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), - expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), - expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), - expand("results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), - expand("results/bwa/{case}_{stage}_{mark}.bedpe", zip, case=IPS, stage=STAGE, mark=MARK), - expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK), - expand("results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt", case=IPS, control=INPUTS, stage=STAGE, mark=MARK), - "results/macs2/E10.5_H3K27ac_macs2_pooled_peaks.narrowPeak", - "results/macs2/E11.5_H3K27ac_macs2_pooled_peaks.narrowPeak", - "results/macs2/E12.5_H3K27ac_macs2_pooled_peaks.narrowPeak", - "results/macs2/E13.5_H3K27ac_macs2_pooled_peaks.narrowPeak", - "results/macs2/E14.5_H3K27ac_macs2_pooled_peaks.narrowPeak", - "results/macs2/E15.5_H3K27ac_macs2_pooled_peaks.narrowPeak", - "results/macs2/E10.5_H3K4me3_macs2_pooled_peaks.narrowPeak", - "results/macs2/E11.5_H3K4me3_macs2_pooled_peaks.narrowPeak", - "results/macs2/E12.5_H3K4me3_macs2_pooled_peaks.narrowPeak", - "results/macs2/E13.5_H3K4me3_macs2_pooled_peaks.narrowPeak", - "results/macs2/E14.5_H3K4me3_macs2_pooled_peaks.narrowPeak", - "results/macs2/E15.5_H3K4me3_macs2_pooled_peaks.narrowPeak", - expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK) + # #expand("results/preseq/{sample}_{stage}_{mark}.ccurve.txt", zip, sample=all_samples, stage=STAGE, mark=MARK), + # #expand("results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt", zip, sample=all_samples, stage=STAGE, mark=MARK), + # #expand("logs/{sample}_{stage}_{mark}.picardLibComplexity", zip, sample=all_samples, stage=STAGE, mark=MARK), + "results/deeptools/multibamsum.npz", + "results/deeptools/multibamsum.tab", + "results/deeptools/pearsoncor_multibamsum.png", + "results/deeptools/pearsoncor_multibamsum_matrix.txt", + expand("results/deeptools/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK), + "results/deeptools/multiBAM_fingerprint.png", + "results/deeptools/multiBAM_fingerprint_metrics.txt", + "results/deeptools/multiBAM_fingerprint_rawcounts.txt", + "results/deeptools/plot_coverage.png", + "results/deeptools/plot_coverage_rawcounts.tab", + expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.Rdata", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.out", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + expand("logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + #expand("results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + # expand("results/bwa/{case}_{stage}_{mark}.bed", zip, case=IPS, stage=STAGE, mark=MARK), + # expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK), + # expand("results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt", case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + # "results/macs2/E10.5_H3K27ac_macs2_pooled_peaks.narrowPeak", + # "results/macs2/E11.5_H3K27ac_macs2_pooled_peaks.narrowPeak", + # "results/macs2/E12.5_H3K27ac_macs2_pooled_peaks.narrowPeak", + # "results/macs2/E13.5_H3K27ac_macs2_pooled_peaks.narrowPeak", + # "results/macs2/E14.5_H3K27ac_macs2_pooled_peaks.narrowPeak", + # "results/macs2/E15.5_H3K27ac_macs2_pooled_peaks.narrowPeak", + # "results/macs2/E10.5_H3K4me3_macs2_pooled_peaks.narrowPeak", + # "results/macs2/E11.5_H3K4me3_macs2_pooled_peaks.narrowPeak", + # "results/macs2/E12.5_H3K4me3_macs2_pooled_peaks.narrowPeak", + # "results/macs2/E13.5_H3K4me3_macs2_pooled_peaks.narrowPeak", + # "results/macs2/E14.5_H3K4me3_macs2_pooled_peaks.narrowPeak", + # "results/macs2/E15.5_H3K4me3_macs2_pooled_peaks.narrowPeak", + # expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK) # directory("results/multiqc/multiqc_report_data/"), # "results/multiqc/multiqc_report.html" @@ -120,10 +118,15 @@ rule all: # =============================================================================================== # 4. FILTERING # > remove unmapped, mate unmapped -# > remove low MAPQ reads (-q) -# > keep proper pairs (-f) +# > remove low MAPQ reads (-q 30) +# > -F 1804 +# -Read unmapped +# -Mate unmapped +# -Not primary alignment +# -Read fails platform/vendor quality checks +# -Read is PCR or optical duplicate # > mark duplicates & remove duplicates (picard) -# > index final position sorted BAM +# > re-filter, sort and index final BAM # =============================================================================================== rule filter: @@ -132,22 +135,32 @@ rule filter: output: "results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam" log: - "logs/{sample}_{stage}_{mark}.dedup" + "logs/{sample}_{stage}_{mark}.filter" shell: - "samtools sort {input} | samtools view -b -q 30 -o {output} - 2> {log}" + "samtools view -b -F 1804 -q 30 {input} | samtools sort -o {output} - 2> {log}" rule markDups: input: "results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam" output: - bam="results/bwa/{sample}_{stage}_{mark}_q30.sorted.dupmark.bam", - dupQC="results/bwa/{sample}_{stage}_{mark}.dup.qc" + bam="results/bwa/{sample}_{stage}_{mark}_q30.dedup.bam", + dupQC="results/bwa/{sample}_{stage}_{mark}.dedup.qc" log: - "logs/{sample}_{stage}_{mark}.dupmark" + "logs/{sample}_{stage}_{mark}.dedup" shell: "picard MarkDuplicates I={input} O={output.bam} \ METRICS_FILE={output.dupQC} REMOVE_DUPLICATES=true ASSUME_SORTED=true 2> {log}" +rule filter2: + input: + "results/bwa/{sample}_{stage}_{mark}_q30.dedup.bam" + output: + "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam" + log: + "logs/{sample}_{stage}_{mark}.dedup" + shell: + "samtools view -b -F 1804 {input} | samtools sort -o {output} - 2> {log}" + rule indexBam: input: "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam" @@ -158,13 +171,13 @@ rule indexBam: shell: "samtools index {input} {output} 2> {log}" -# =============================================================================================== -# 4. ALIGNMENT QC -# > SAMtools flagstat statistics -# > CollectMultipleMetrics (picard) -# > Compute Library Complexity (PreSeq) -# =============================================================================================== - +# # =============================================================================================== +# # 4. ALIGNMENT QC +# # > SAMtools flagstat statistics +# # > CollectMultipleMetrics (picard) +# # > Compute Library Complexity (PreSeq) +# # =============================================================================================== +# rule mappingStats: input: "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam" @@ -172,35 +185,35 @@ rule mappingStats: "logs/{sample}_{stage}_{mark}.flagstat.qc" shell: "samtools flagstat {input} > {output}" - -rule preseq: - input: - "results/bwa/{sample}_{stage}_{mark}.bam" - output: - "results/preseq/{sample}_{stage}_{mark}.ccurve.txt" - log: - "logs/{sample}_{stage}_{mark}.preseq" - shell: - "preseq lc_extrap -v -output {output} -bam {input} 2> {log}" - - -rule get_picard_complexity_metrics: - input: - "results/bwa/{sample}_{stage}_{mark}.bam" - output: - "results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt" - log: - "logs/{sample}_{stage}_{mark}.picardLibComplexity" - shell: - "picard -Xmx6G EstimateLibraryComplexity INPUT={input} OUTPUT={output} USE_JDK_DEFLATER=TRUE USE_JDK_INFLATER=TRUE VERBOSITY=ERROR" - -# =============================================================================================== -# 5. deepTools -# > multiBAMsummary -# > plotCorrelation -# > plotFingerprint -# > bamCoverage (read depth normalised bigWig files) -# =============================================================================================== +# +# # rule preseq: +# # input: +# # "results/bwa/{sample}_{stage}_{mark}.bam" +# # output: +# # "results/preseq/{sample}_{stage}_{mark}.ccurve.txt" +# # log: +# # "logs/{sample}_{stage}_{mark}.preseq" +# # shell: +# # "preseq lc_extrap -v -output {output} -bam {input} 2> {log}" +# # +# # +# # rule get_picard_complexity_metrics: +# # input: +# # "results/bwa/{sample}_{stage}_{mark}.bam" +# # output: +# # "results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt" +# # log: +# # "logs/{sample}_{stage}_{mark}.picardLibComplexity" +# # shell: +# # "picard -Xmx6G EstimateLibraryComplexity INPUT={input} OUTPUT={output} USE_JDK_DEFLATER=TRUE USE_JDK_INFLATER=TRUE VERBOSITY=ERROR" +# +# # =============================================================================================== +# # 5. deepTools +# # > multiBAMsummary +# # > plotCorrelation +# # > plotFingerprint +# # > bamCoverage (read depth normalised bigWig files) +# # =============================================================================================== rule deeptools_summary: input: @@ -239,23 +252,19 @@ rule deeptools_correlation: rule deeptools_coverage: input: - "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam" + bam = "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", + bai = "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai" output: "results/deeptools/{sample}_{stage}_{mark}.SeqDepthNorm.bw" log: "logs/{sample}_{stage}_{mark}_coverage.deepTools" shell: - "bamCoverage \ - --bam {input} \ - --binSize 10 \ - --normalizeUsing RPGC \ - --effectiveGenomeSize add mm10 \ - --extendReads \ - -o {output} 2> {log}" + "bamCoverage --bam {input.bam} --binSize 10 --normalizeUsing RPGC --effectiveGenomeSize 2308125349 --numberOfProcessors 4 -o {output} 2> {log}" rule deeptools_fingerprint: input: - expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK) + bam = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK), + bai = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark = MARK) output: fig="results/deeptools/multiBAM_fingerprint.png", metrics="results/deeptools/multiBAM_fingerprint_metrics.txt", @@ -265,7 +274,7 @@ rule deeptools_fingerprint: "logs/fingerprint.deepTools" shell: "plotFingerprint -p {threads} \ - -b {input} \ + -b {input.bam} \ --plotFile {output.fig} \ --outQualityMetrics {output.metrics} \ --outRawCounts {output.rawcounts} \ @@ -275,22 +284,23 @@ rule deeptools_fingerprint: rule deeptools_plotCoverage: input: - expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK) + bam = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark=MARK), + bai = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark=MARK) output: fig="results/deeptools/plot_coverage.png", rawcounts="results/deeptools/plot_coverage_rawcounts.tab" log: "logs/plotCoverage.deepTools" shell: - "plotCoverage --plotFile {output.fig} \ + "plotCoverage --bamfiles {input.bam} --plotFile {output.fig} \ -n 1000000 \ --outRawCounts {output.rawcounts} \ --ignoreDuplicates 2> {log}" -# =============================================================================================== -# 6. phantomPeakQuals -# =============================================================================================== +# # =============================================================================================== +# # 6. phantomPeakQuals +# # =============================================================================================== rule phantomPeakQuals: input: @@ -304,20 +314,20 @@ rule phantomPeakQuals: shell: "Rscript scripts/run_spp.R -c={input} -savp={output.savp} -savd={output.savd} -out={output.out} 2> {log}" -# =============================================================================================== -# 7. Call peaks (MACS2) -# =============================================================================================== +# # =============================================================================================== +# # 7. Call peaks (MACS2) +# # =============================================================================================== rule call_peaks_macs2: input: - control = "results/bwa/{control}_{stage}_{mark}_PPq30.sorted.dedup.bam", - case = "results/bwa/{control}_{stage}_{mark}_PPq30.sorted.dedup.bam" + control = "results/bwa/{control}_{stage}_{mark}_q30.sorted.dedup.bam", + case = "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam" output: "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", log: - "logs/{case}_vs_{control}_call_peaks_macs2.log" + "logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log" params: name = "{case}_vs_{control}_{stage}_{mark}_macs2", shell: @@ -328,116 +338,99 @@ rule call_peaks_macs2: -g mm 2> {log} " -# =============================================================================================== -# 8. Peak QC -# > narrow peak counts -# > fraction of reads in peaks (convert BAM to bed first then do intersect with peaks) -# =============================================================================================== - -# peak counts in a format that multiqc can handle -rule get_narrow_peak_counts_for_multiqc: - input: - peaks = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak" - output: - "results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json" - params: - peakType = "narrowPeak" - shell: - "python3 scripts/count_peaks.py \ - --peak_type {params.peakType} \ - --peaks {input.peaks} --sample_name {wildcards.case} > {output}" - - -## Convert BAM to tagAlign file for calculating FRiP QC metric (Fraction of reads in peaks) -rule bamToBed: - input: - "results/bwa/{case}_{stage}_{mark}_PPq30.sorted.dedup.bam" - output: - "results/bwa/{case}_{stage}_{mark}.bed" - log: - "logs/{case}_{stage}_{mark}_.bamToBed" - shell: - "samtools sort -n {input} | bedtools bamtobed -i - > {output}" - +# # =============================================================================================== +# # 8. Peak QC +# # > narrow peak counts +# # > fraction of reads in peaks (convert BAM to bed first then do intersect with peaks) +# # =============================================================================================== -## Fraction of reads in peaks -rule frip: - input: - bed = "results/bwa/{case}_{stage}_{mark}.bed", - peak = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak" - output: - "results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt" - shell: - "python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}" - - -# =============================================================================================== -# 9. Create consensus peaksets for replicates -# =============================================================================================== - -# Based on ENCODE `overlap_peaks.py` - recommended for histone marks. -# Need to pool peaks first for each replicate +#peak counts in a format that multiqc can handle +# rule get_narrow_peak_counts_for_multiqc: +# input: +# peaks = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak" +# output: +# "results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json" +# params: +# peakType = "narrowPeak" +# shell: +# "python3 scripts/count_peaks.py \ +# --peak_type {params.peakType} \ +# --peaks {input.peaks} --sample_name {wildcards.case} > {output}" +# +# +# ## Convert BAM to tagAlign file for calculating FRiP QC metric (Fraction of reads in peaks) +# rule bamToBed: +# input: +# "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam" +# output: +# "results/bwa/{case}_{stage}_{mark}.bed" +# log: +# "logs/{case}_{stage}_{mark}.bamToBed" +# shell: +# "samtools sort -n {input} | bedtools bamtobed -i - > {output}" +# +# +# ## Fraction of reads in peaks +# rule frip: +# input: +# bed = "results/bwa/{case}_{stage}_{mark}.bed", +# peak = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak" +# output: +# "results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt" +# shell: +# "python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}" -rule pool_peaks: - input: - M1S1 = expand(["results/macs2/{case}_vs_{control}_E10.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS), - M1S2 = expand(["results/macs2/{case}_vs_{control}_E11.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS), - M1S3 = expand(["results/macs2/{case}_vs_{control}_E12.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS), - M1S4 = expand(["results/macs2/{case}_vs_{control}_E13.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS), - M1S5 = expand(["results/macs2/{case}_vs_{control}_E14.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS), - M1S6 = expand(["results/macs2/{case}_vs_{control}_E15.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS), - M2S1 = expand(["results/macs2/{case}_vs_{control}_E10.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS), - M2S2 = expand(["results/macs2/{case}_vs_{control}_E11.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS), - M2S3 = expand(["results/macs2/{case}_vs_{control}_E12.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS), - M2S4 = expand(["results/macs2/{case}_vs_{control}_E13.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS), - M2S5 = expand(["results/macs2/{case}_vs_{control}_E14.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS), - M2S6 = expand(["results/macs2/{case}_vs_{control}_E15.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS) - output: - M1S1 = "results/macs2/E10.5_H3K27ac_macs2_pooled_peaks.narrowPeak", - M1S2 = "results/macs2/E11.5_H3K27ac_macs2_pooled_peaks.narrowPeak", - M1S3 = "results/macs2/E12.5_H3K27ac_macs2_pooled_peaks.narrowPeak", - M1S4 = "results/macs2/E13.5_H3K27ac_macs2_pooled_peaks.narrowPeak", - M1S5 = "results/macs2/E14.5_H3K27ac_macs2_pooled_peaks.narrowPeak", - M1S6 = "results/macs2/E15.5_H3K27ac_macs2_pooled_peaks.narrowPeak", - M2S1 = "results/macs2/E10.5_H3K4me3_macs2_pooled_peaks.narrowPeak", - M2S2 = "results/macs2/E11.5_H3K4me3_macs2_pooled_peaks.narrowPeak", - M2S3 = "results/macs2/E12.5_H3K4me3_macs2_pooled_peaks.narrowPeak", - M2S4 = "results/macs2/E13.5_H3K4me3_macs2_pooled_peaks.narrowPeak", - M2S5 = "results/macs2/E14.5_H3K4me3_macs2_pooled_peaks.narrowPeak", - M2S6 = "results/macs2/E15.5_H3K4me3_macs2_pooled_peaks.narrowPeak" - - run: - shell("cat {input.M1S1} > {output.M1S1}") - shell("cat {input.M1S2} > {output.M1S2}") - shell("cat {input.M1S3} > {output.M1S3}") - shell("cat {input.M1S4} > {output.M1S4}") - shell("cat {input.M1S5} > {output.M1S5}") - shell("cat {input.M1S6} > {output.M1S6}") - shell("cat {input.M2S1} > {output.M2S1}") - shell("cat {input.M2S2} > {output.M2S2}") - shell("cat {input.M2S3} > {output.M2S3}") - shell("cat {input.M2S4} > {output.M2S4}") - shell("cat {input.M2S5} > {output.M2S5}") - shell("cat {input.M2S6} > {output.M2S6}") - -rule overlap_peaks_H3K4me3: - input: - peaks=expand(["results/macs2/{case}_vs_{control}_E10.5_H3K4me3_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E11.5_H3K4me3_macs2_peaks.narrowPeak"],["results/macs2/{case}_vs_{control}_E12.5_H3K4me3_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E13.5_H3K4me3_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E14.5_H3K4me3_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E15.5_H3K4me3_macs2_peaks.narrowPeak"],zip, case=IPS, control=INPUTS), - pooled=expand("results/macs2/{stage}_{mark}_macs2_pooled_peaks.narrowPeak", zip, stage = STAGE, mark = MARK) - output: - expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK) - shell: - "python2.7 scripts/overlap_peaks.py {input.peaks} {input.pooled} {output}" +# # =============================================================================================== +# # 9. Create consensus peaksets for replicates +# # =============================================================================================== +# +# # Based on ENCODE `overlap_peaks.py` - recommended for histone marks. +# # Need to pool peaks first for each replicate +# +# rule pool_peaks: +# input: +# E10 = expand(["results/macs2/{case}_vs_{control}_E10.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK), +# E11 = expand(["results/macs2/{case}_vs_{control}_E11.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK), +# E12 = expand(["results/macs2/{case}_vs_{control}_E12.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK), +# E13 = expand(["results/macs2/{case}_vs_{control}_E13.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK), +# E14 = expand(["results/macs2/{case}_vs_{control}_E14.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK), +# E15 = expand(["results/macs2/{case}_vs_{control}_E15.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK) +# output: +# E10 = "results/macs2/E10.5_H3K27ac_macs2_pooled_peaks.narrowPeak", +# E11 = "results/macs2/E11.5_H3K27ac_macs2_pooled_peaks.narrowPeak", +# E12 = "results/macs2/E12.5_H3K27ac_macs2_pooled_peaks.narrowPeak", +# E13 = "results/macs2/E13.5_H3K27ac_macs2_pooled_peaks.narrowPeak", +# E14 = "results/macs2/E14.5_H3K27ac_macs2_pooled_peaks.narrowPeak", +# E15 = "results/macs2/E15.5_H3K27ac_macs2_pooled_peaks.narrowPeak", +# run: +# shell("cat {input.E10} > {output.E10}") +# shell("cat {input.E11} > {output.E11}") +# shell("cat {input.E12} > {output.E12}") +# shell("cat {input.E13} > {output.E13}") +# shell("cat {input.E14} > {output.E14}") +# shell("cat {input.E15} > {output.E15}") +# +# rule overlap_peaks: +# input: +# E10= expand(["results/macs2/{case}_vs_{control}_E10.5_{mark}_macs2_peaks.narrowPeak"]), +# E11= expand(["results/macs2/{case}_vs_{control}_E11.5_{mark}_macs2_peaks.narrowPeak"]), +# E12= expand(["results/macs2/{case}_vs_{control}_E12.5_{mark}_macs2_peaks.narrowPeak"]), +# E13= expand(["results/macs2/{case}_vs_{control}_E13.5_{mark}_macs2_peaks.narrowPeak"]), +# E14= expand(["results/macs2/{case}_vs_{control}_E14.5_{mark}_macs2_peaks.narrowPeak"]), +# E15= expand(["results/macs2/{case}_vs_{control}_E15.5_{mark}_macs2_peaks.narrowPeak"]). +# E10_pooled=expand("results/macs2/E10.5_{mark}_macs2_pooled_peaks.narrowPeak") +# E11_pooled=expand("results/macs2/E11.5_{mark}_macs2_pooled_peaks.narrowPeak") +# E12_pooled= +# E13_pooled= +# E14_pooled= +# E15_pooled= +# output: +# expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK) +# shell: +# "python2.7 scripts/overlap_peaks.py {input.peaks} {input.pooled} {output}" +# -rule overlap_peaks_H3K27ac: - input: - peaks=expand(["results/macs2/{case}_vs_{control}_E10.5_H3K27ac_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E11.5_H3K27ac_macs2_peaks.narrowPeak"],["results/macs2/{case}_vs_{control}_E12.5_H3K27ac_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E13.5_H3K27ac_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E14.5_H3K27ac_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E15.5_H3K27ac_macs2_peaks.narrowPeak"],zip, case=IPS, control=INPUTS), - pooled=expand("results/macs2/{stage}_{mark}_macs2_pooled_peaks.narrowPeak", zip, stage = STAGE, mark = MARK) - output: - expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK) - shell: - "python2.7 scripts/overlap_peaks.py {input.peaks} {input.pooled} {output}" # =============================================================================================== # 10. QC on replicate peaks