diff --git a/mouse/Snakefile b/mouse/Snakefile index fc83017e193fd2e1876b1ba9b99443ecc79c5345..4c6703bb7da48063f30911113cd27069c7e63dff 100644 --- a/mouse/Snakefile +++ b/mouse/Snakefile @@ -54,30 +54,27 @@ rule all: expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai", zip, sample=all_samples, stage=STAGE, mark=MARK), expand("logs/{sample}_{stage}_{mark}.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), - # #expand("results/preseq/{sample}_{stage}_{mark}.ccurve.txt", zip, sample=all_samples, stage=STAGE, mark=MARK), - # #expand("results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt", zip, sample=all_samples, stage=STAGE, mark=MARK), - # #expand("logs/{sample}_{stage}_{mark}.picardLibComplexity", zip, sample=all_samples, stage=STAGE, mark=MARK), - "results/deeptools/multibamsum.npz", - "results/deeptools/multibamsum.tab", - "results/deeptools/pearsoncor_multibamsum.png", - "results/deeptools/pearsoncor_multibamsum_matrix.txt", - expand("results/deeptools/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK), - "results/deeptools/multiBAM_fingerprint.png", - "results/deeptools/multiBAM_fingerprint_metrics.txt", - "results/deeptools/multiBAM_fingerprint_rawcounts.txt", - "results/deeptools/plot_coverage.png", - "results/deeptools/plot_coverage_rawcounts.tab", - expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.Rdata", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.out", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), - expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), - expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), - expand("logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), - #expand("results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), - # expand("results/bwa/{case}_{stage}_{mark}.bed", zip, case=IPS, stage=STAGE, mark=MARK), - # expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK), - # expand("results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt", case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + "results/deeptools/multibamsum.npz", + "results/deeptools/multibamsum.tab", + "results/deeptools/pearsoncor_multibamsum.png", + "results/deeptools/pearsoncor_multibamsum_matrix.txt", + expand("results/deeptools/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK), + "results/deeptools/multiBAM_fingerprint.png", + "results/deeptools/multiBAM_fingerprint_metrics.txt", + "results/deeptools/multiBAM_fingerprint_rawcounts.txt", + "results/deeptools/plot_coverage.png", + "results/deeptools/plot_coverage_rawcounts.tab", + expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.Rdata", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.out", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + expand("logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + expand("results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + #expand("results/bwa/{case}_{stage}_{mark}.bed", zip, case=IPS, stage=STAGE, mark=MARK), + #expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK), + #expand("results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt", case=IPS, control=INPUTS, stage=STAGE, mark=MARK) # "results/macs2/E10.5_H3K27ac_macs2_pooled_peaks.narrowPeak", # "results/macs2/E11.5_H3K27ac_macs2_pooled_peaks.narrowPeak", # "results/macs2/E12.5_H3K27ac_macs2_pooled_peaks.narrowPeak", @@ -151,7 +148,7 @@ rule markDups: "picard MarkDuplicates I={input} O={output.bam} \ METRICS_FILE={output.dupQC} REMOVE_DUPLICATES=true ASSUME_SORTED=true 2> {log}" -rule filter2: +rule sort: input: "results/bwa/{sample}_{stage}_{mark}_q30.dedup.bam" output: @@ -159,7 +156,7 @@ rule filter2: log: "logs/{sample}_{stage}_{mark}.dedup" shell: - "samtools view -b -F 1804 {input} | samtools sort -o {output} - 2> {log}" + "samtools sort -o {output} - 2> {log}" rule indexBam: input: @@ -185,28 +182,7 @@ rule mappingStats: "logs/{sample}_{stage}_{mark}.flagstat.qc" shell: "samtools flagstat {input} > {output}" -# -# # rule preseq: -# # input: -# # "results/bwa/{sample}_{stage}_{mark}.bam" -# # output: -# # "results/preseq/{sample}_{stage}_{mark}.ccurve.txt" -# # log: -# # "logs/{sample}_{stage}_{mark}.preseq" -# # shell: -# # "preseq lc_extrap -v -output {output} -bam {input} 2> {log}" -# # -# # -# # rule get_picard_complexity_metrics: -# # input: -# # "results/bwa/{sample}_{stage}_{mark}.bam" -# # output: -# # "results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt" -# # log: -# # "logs/{sample}_{stage}_{mark}.picardLibComplexity" -# # shell: -# # "picard -Xmx6G EstimateLibraryComplexity INPUT={input} OUTPUT={output} USE_JDK_DEFLATER=TRUE USE_JDK_INFLATER=TRUE VERBOSITY=ERROR" -# + # # =============================================================================================== # # 5. deepTools # # > multiBAMsummary @@ -335,7 +311,7 @@ rule call_peaks_macs2: -c {input.control} \ --outdir results/macs2/ \ -n {params.name} \ - -g mm 2> {log} " + -g 1.87e9 2> {log} " # # =============================================================================================== @@ -345,25 +321,25 @@ rule call_peaks_macs2: # # =============================================================================================== #peak counts in a format that multiqc can handle -# rule get_narrow_peak_counts_for_multiqc: -# input: -# peaks = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak" -# output: -# "results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json" -# params: -# peakType = "narrowPeak" -# shell: -# "python3 scripts/count_peaks.py \ -# --peak_type {params.peakType} \ -# --peaks {input.peaks} --sample_name {wildcards.case} > {output}" -# -# +rule get_narrow_peak_counts_for_multiqc: + input: + peaks = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak" + output: + "results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json" + params: + peakType = "narrowPeak" + shell: + "python2.7 scripts/count_peaks.py \ + --peak_type {params.peakType} \ + --peaks {input.peaks} --sample_name {wildcards.case}_{wildcards.stage}_{wildcards.mark} > {output}" + + # ## Convert BAM to tagAlign file for calculating FRiP QC metric (Fraction of reads in peaks) # rule bamToBed: # input: # "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam" # output: -# "results/bwa/{case}_{stage}_{mark}.bed" +# "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed" # log: # "logs/{case}_{stage}_{mark}.bamToBed" # shell: @@ -373,13 +349,13 @@ rule call_peaks_macs2: # ## Fraction of reads in peaks # rule frip: # input: -# bed = "results/bwa/{case}_{stage}_{mark}.bed", +# bed = "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed", # peak = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak" # output: # "results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt" # shell: # "python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}" - +# # # =============================================================================================== # # 9. Create consensus peaksets for replicates