diff --git a/mouse/Snakefile_H3K27ac b/mouse/Snakefile_H3K27ac index 55a1eea91633729e065359aa16e98920fb0f9256..26d04cf754084623fbb241b94c4e28e8c21ab3bd 100644 --- a/mouse/Snakefile_H3K27ac +++ b/mouse/Snakefile_H3K27ac @@ -12,7 +12,7 @@ # 10. Present QC for raw read, alignment, peak-calling in MultiQC -configfile: "configs/config_H3K427ac.yaml" +configfile: "configs/config_H3K27ac.yaml" # Contains sample and genome information # calls the SRR.txt file that contains the samples as either IP or control @@ -48,69 +48,75 @@ all_samples = IPS + INPUTS rule all: input: - expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/qc/{sample}_{stage}_{mark}.dedup.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/qc/{sample}_{stage}_{mark}.dupmark.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/qc/{sample}_{stage}_{mark}.q30.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/qc/{sample}_{stage}_{mark}.unfiltered.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), - "results/bwa/E10.5_H3K27ac_q30.sorted.pooled.dedup.bam", - "results/bwa/E11.5_H3K27ac_q30.sorted.pooled.dedup.bam", - "results/bwa/E12.5_H3K27ac_q30.sorted.pooled.dedup.bam", - "results/bwa/E13.5_H3K27ac_q30.sorted.pooled.dedup.bam", - "results/bwa/E14.5_H3K27ac_q30.sorted.pooled.dedup.bam", - "results/bwa/E15.5_H3K27ac_q30.sorted.pooled.dedup.bam", - "results/bwa/input_E10.5_H3K27ac_q30.sorted.dedup.bam", - "results/bwa/input_E11.5_H3K27ac_q30.sorted.dedup.bam", - "results/bwa/input_E12.5_H3K27ac_q30.sorted.dedup.bam", - "results/bwa/input_E13.5_H3K27ac_q30.sorted.dedup.bam", - "results/bwa/input_E14.5_H3K27ac_q30.sorted.dedup.bam", - "results/bwa/input_E15.5_H3K27ac_q30.sorted.dedup.bam", - "logs/E10.5_H3K27ac.mergeBAM", - "logs/E11.5_H3K27ac.mergeBAM", - "logs/E12.5_H3K27ac.mergeBAM", - "logs/E13.5_H3K27ac.mergeBAM", - "logs/E14.5_H3K27ac.mergeBAM", - "logs/E15.5_H3K27ac.mergeBAM", - "logs/input_E10.5_H3K27ac.mergeBAM", - "logs/input_E11.5_H3K27ac.mergeBAM", - "logs/input_E12.5_H3K27ac.mergeBAM", - "logs/input_E13.5_H3K27ac.mergeBAM", - "logs/input_E14.5_H3K27ac.mergeBAM", - "logs/input_E15.5_H3K27ac.mergeBAM", - # "results/qc/multibamsum.npz", - # "results/qc/multibamsum.tab", - # "results/qc/pearsoncor_multibamsum.png", - # "results/qc/pearsoncor_multibamsum_matrix.txt", - # expand("results/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK), - # "results/qc/multiBAM_fingerprint.png", - # "results/qc/multiBAM_fingerprint_metrics.txt", - # "results/qc/multiBAM_fingerprint_rawcounts.txt", - # "results/qc/plot_coverage.png", - # "results/qc/plot_coverage_rawcounts.tab", - #expand("results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), - #expand("logs/{sample}_{stage}_{mark}.pbc.sort", zip, sample=all_samples, stage=STAGE, mark=MARK), - #expand("results/qc/{sample}_{stage}_{mark}.pbc.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc",zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK), - expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), - expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), - expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), - expand("logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), - expand("results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed", zip, case=IPS, stage=STAGE, mark=MARK), - expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK), - expand("results/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), - directory("results/macs2/pooled/"), - "results/macs2/H3K27ac_E10.5_overlap.narrowPeak", - "results/macs2/H3K27ac_E11.5_overlap.narrowPeak", - "results/macs2/H3K27ac_E12.5_overlap.narrowPeak", - "results/macs2/H3K27ac_E13.5_overlap.narrowPeak", - "results/macs2/H3K27ac_E14.5_overlap.narrowPeak", - "results/macs2/H3K27ac_E15.5_overlap.narrowPeak" - # directory("results/multiqc/multiqc_report_data/"), - # "results/multiqc/multiqc_report.html" + #expand("results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), + #expand("results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), + #expand("results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results_10M/qc/{sample}_{stage}_{mark}.dedup.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), + #expand("results_10M/qc/{sample}_{stage}_{mark}.dupmark.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), + #expand("results_10M/qc/{sample}_{stage}_{mark}.q30.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), + #expand("results_10M/qc/{sample}_{stage}_{mark}.unfiltered.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), + "results_10M/bwa/E10.5_H3K27ac_q30.sorted.pooled.dedup.bam", + "results_10M/bwa/E11.5_H3K27ac_q30.sorted.pooled.dedup.bam", + "results_10M/bwa/E12.5_H3K27ac_q30.sorted.pooled.dedup.bam", + "results_10M/bwa/E13.5_H3K27ac_q30.sorted.pooled.dedup.bam", + "results_10M/bwa/E14.5_H3K27ac_q30.sorted.pooled.dedup.bam", + "results_10M/bwa/E15.5_H3K27ac_q30.sorted.pooled.dedup.bam", + "results_10M/bwa/input_E10.5_H3K27ac_q30.sorted.dedup.bam", + "results_10M/bwa/input_E11.5_H3K27ac_q30.sorted.dedup.bam", + "results_10M/bwa/input_E12.5_H3K27ac_q30.sorted.dedup.bam", + "results_10M/bwa/input_E13.5_H3K27ac_q30.sorted.dedup.bam", + "results_10M/bwa/input_E14.5_H3K27ac_q30.sorted.dedup.bam", + "results_10M/bwa/input_E15.5_H3K27ac_q30.sorted.dedup.bam", + "results_10M/logs/E10.5_H3K27ac.mergeBAM", + "results_10M/logs/E11.5_H3K27ac.mergeBAM", + "results_10M/logs/E12.5_H3K27ac.mergeBAM", + "results_10M/logs/E13.5_H3K27ac.mergeBAM", + "results_10M/logs/E14.5_H3K27ac.mergeBAM", + "results_10M/logs/E15.5_H3K27ac.mergeBAM", + "results_10M/logs/input_E10.5_H3K27ac.mergeBAM", + "results_10M/logs/input_E11.5_H3K27ac.mergeBAM", + "results_10M/logs/input_E12.5_H3K27ac.mergeBAM", + "results_10M/logs/input_E13.5_H3K27ac.mergeBAM", + "results_10M/logs/input_E14.5_H3K27ac.mergeBAM", + "results_10M/logs/input_E15.5_H3K27ac.mergeBAM", + "results_10M/qc/H3K27ac_multibamsum.npz", + "results_10M/qc/H3K27ac_multibamsum.tab", + "results_10M/qc/H3K27ac_pearsoncor_multibamsum.png", + "results_10M/qc/H3K27ac_pearsoncor_multibamsum_matrix.txt", + expand("results_10M/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK), + "results_10M/qc/H3K27ac_multiBAM_fingerprint.png", + "results_10M/qc/H3K27ac_multiBAM_fingerprint_metrics.txt", + "results_10M/qc/H3K27ac_multiBAM_fingerprint_rawcounts.txt", + "results_10M/qc/H3K27ac_plot_coverage.png", + "results_10M/qc/H3K27ac_plot_coverage_rawcounts.tab", + #expand("results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), + #expand("results_10M/logs/{sample}_{stage}_{mark}.pbc.sort", zip, sample=all_samples, stage=STAGE, mark=MARK), + #expand("results_10M/qc/{sample}_{stage}_{mark}.pbc.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), + #expand("results_10M/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc",zip, sample=all_samples, stage=STAGE, mark=MARK), + #expand("results_10M/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + expand("results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + expand("results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + expand("results_10M/logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + expand("results_10M/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed", zip, case=IPS, stage=STAGE, mark=MARK), + expand("results_10M/logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK), + expand("results_10M/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + directory("results_10M/macs2/pooled/"), + "results_10M/macs2/H3K27ac_E10.5_overlap.narrowPeak", + "results_10M/macs2/H3K27ac_E11.5_overlap.narrowPeak", + "results_10M/macs2/H3K27ac_E12.5_overlap.narrowPeak", + "results_10M/macs2/H3K27ac_E13.5_overlap.narrowPeak", + "results_10M/macs2/H3K27ac_E14.5_overlap.narrowPeak", + "results_10M/macs2/H3K27ac_E15.5_overlap.narrowPeak", + "results_10M/macs2/H3K27ac_E10.5_overlap.frip", + "results_10M/macs2/H3K27ac_E11.5_overlap.frip", + "results_10M/macs2/H3K27ac_E12.5_overlap.frip", + "results_10M/macs2/H3K27ac_E13.5_overlap.frip", + "results_10M/macs2/H3K27ac_E14.5_overlap.frip", + "results_10M/macs2/H3K27ac_E15.5_overlap.frip", + # directory("results_10M/multiqc/multiqc_report_data/"), + # "results_10M/multiqc/multiqc_report.html" # =============================================================================================== # 1. FASTQC @@ -149,86 +155,86 @@ rule all: rule filter: input: - "results/bwa/{sample}_{stage}_{mark}.bam" + "results_10M/bwa/{sample}_{stage}_{mark}.bam" output: - "results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam" + "results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.bam" log: - "logs/{sample}_{stage}_{mark}.filter" + "results_10M/logs/{sample}_{stage}_{mark}.filter" shell: "samtools view -b -F 1804 -q 30 {input} | samtools sort -o {output} - 2> {log}" rule markDups: input: - "results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam" + "results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.bam" output: - bam="results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam", - dupQC="results/bwa/{sample}_{stage}_{mark}.dupmark.qc" + bam="results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam", + dupQC="results_10M/bwa/{sample}_{stage}_{mark}.dupmark.qc" log: - "logs/{sample}_{stage}_{mark}.dedup" + "results_10M/logs/{sample}_{stage}_{mark}.dedup" shell: "picard MarkDuplicates I={input} O={output.bam} \ METRICS_FILE={output.dupQC} REMOVE_DUPLICATES=false ASSUME_SORTED=true 2> {log}" rule dedup: input: - "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam" + "results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam" output: - "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam" + "results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam" log: - "logs/{sample}_{stage}_{mark}.dedup" + "results_10M/logs/{sample}_{stage}_{mark}.dedup" shell: "samtools view -F 1804 -b {input} | samtools sort -o {output} - 2> {log}" rule indexBam: input: - "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam" + "results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam" output: - "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai" + "results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai" log: - "logs/{sample}_{stage}_{mark}.indexBam" + "results_10M/logs/{sample}_{stage}_{mark}.indexBam" shell: "samtools index {input} {output} 2> {log}" rule mergeBAMreplicates: input: - E10 = ["results/bwa/ENCFF213EBC_E10.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF548BRR_E10.5_H3K27ac_q30.sorted.dedup.bam"], - E11 = ["results/bwa/ENCFF512SFE_E11.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF515PKL_E11.5_H3K27ac_q30.sorted.dedup.bam"], - E12 = ["results/bwa/ENCFF394TZN_E12.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF011NFM_E12.5_H3K27ac_q30.sorted.dedup.bam"], - E13 = ["results/bwa/ENCFF194ORC_E13.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF290ZNF_E13.5_H3K27ac_q30.sorted.dedup.bam"], - E14 = ["results/bwa/ENCFF327VAO_E14.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF902HAR_E14.5_H3K27ac_q30.sorted.dedup.bam"], - E15 = ["results/bwa/ENCFF584JFB_E15.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF707WKL_E15.5_H3K27ac_q30.sorted.dedup.bam"], - E10C = ["results/bwa/ENCFF157KEH_E10.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF825AVI_E10.5_H3K27ac_q30.sorted.dedup.bam"], - E11C = ["results/bwa/ENCFF184CUE_E11.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF376FGM_E11.5_H3K27ac_q30.sorted.dedup.bam"], - E12C = ["results/bwa/ENCFF203JQV_E12.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF058AUT_E12.5_H3K27ac_q30.sorted.dedup.bam"], - E13C = ["results/bwa/ENCFF117QRC_E13.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF248PGK_E13.5_H3K27ac_q30.sorted.dedup.bam"], - E14C = ["results/bwa/ENCFF784ORI_E14.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF002HZV_E14.5_H3K27ac_q30.sorted.dedup.bam"], - E15C = ["results/bwa/ENCFF727QTS_E15.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF182XFG_E15.5_H3K27ac_q30.sorted.dedup.bam"], + E10 = ["results_10M/bwa/ENCFF213EBC_E10.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF548BRR_E10.5_H3K27ac_q30.sorted.dedup.bam"], + E11 = ["results_10M/bwa/ENCFF512SFE_E11.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF515PKL_E11.5_H3K27ac_q30.sorted.dedup.bam"], + E12 = ["results_10M/bwa/ENCFF394TZN_E12.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF011NFM_E12.5_H3K27ac_q30.sorted.dedup.bam"], + E13 = ["results_10M/bwa/ENCFF194ORC_E13.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF290ZNF_E13.5_H3K27ac_q30.sorted.dedup.bam"], + E14 = ["results_10M/bwa/ENCFF327VAO_E14.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF902HAR_E14.5_H3K27ac_q30.sorted.dedup.bam"], + E15 = ["results_10M/bwa/ENCFF584JFB_E15.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF707WKL_E15.5_H3K27ac_q30.sorted.dedup.bam"], + E10C = ["results_10M/bwa/ENCFF157KEH_E10.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF825AVI_E10.5_H3K27ac_q30.sorted.dedup.bam"], + E11C = ["results_10M/bwa/ENCFF184CUE_E11.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF376FGM_E11.5_H3K27ac_q30.sorted.dedup.bam"], + E12C = ["results_10M/bwa/ENCFF203JQV_E12.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF058AUT_E12.5_H3K27ac_q30.sorted.dedup.bam"], + E13C = ["results_10M/bwa/ENCFF117QRC_E13.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF248PGK_E13.5_H3K27ac_q30.sorted.dedup.bam"], + E14C = ["results_10M/bwa/ENCFF784ORI_E14.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF002HZV_E14.5_H3K27ac_q30.sorted.dedup.bam"], + E15C = ["results_10M/bwa/ENCFF727QTS_E15.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF182XFG_E15.5_H3K27ac_q30.sorted.dedup.bam"], output: - E10 = "results/bwa/E10.5_H3K27ac_q30.sorted.pooled.dedup.bam", - E11 = "results/bwa/E11.5_H3K27ac_q30.sorted.pooled.dedup.bam", - E12 = "results/bwa/E12.5_H3K27ac_q30.sorted.pooled.dedup.bam", - E13 = "results/bwa/E13.5_H3K27ac_q30.sorted.pooled.dedup.bam", - E14 = "results/bwa/E14.5_H3K27ac_q30.sorted.pooled.dedup.bam", - E15 = "results/bwa/E15.5_H3K27ac_q30.sorted.pooled.dedup.bam", - E10C = "results/bwa/input_E10.5_H3K27ac_q30.sorted.dedup.bam", - E11C = "results/bwa/input_E11.5_H3K27ac_q30.sorted.dedup.bam", - E12C = "results/bwa/input_E12.5_H3K27ac_q30.sorted.dedup.bam", - E13C = "results/bwa/input_E13.5_H3K27ac_q30.sorted.dedup.bam", - E14C = "results/bwa/input_E14.5_H3K27ac_q30.sorted.dedup.bam", - E15C = "results/bwa/input_E15.5_H3K27ac_q30.sorted.dedup.bam" + E10 = "results_10M/bwa/E10.5_H3K27ac_q30.sorted.pooled.dedup.bam", + E11 = "results_10M/bwa/E11.5_H3K27ac_q30.sorted.pooled.dedup.bam", + E12 = "results_10M/bwa/E12.5_H3K27ac_q30.sorted.pooled.dedup.bam", + E13 = "results_10M/bwa/E13.5_H3K27ac_q30.sorted.pooled.dedup.bam", + E14 = "results_10M/bwa/E14.5_H3K27ac_q30.sorted.pooled.dedup.bam", + E15 = "results_10M/bwa/E15.5_H3K27ac_q30.sorted.pooled.dedup.bam", + E10C = "results_10M/bwa/input_E10.5_H3K27ac_q30.sorted.dedup.bam", + E11C = "results_10M/bwa/input_E11.5_H3K27ac_q30.sorted.dedup.bam", + E12C = "results_10M/bwa/input_E12.5_H3K27ac_q30.sorted.dedup.bam", + E13C = "results_10M/bwa/input_E13.5_H3K27ac_q30.sorted.dedup.bam", + E14C = "results_10M/bwa/input_E14.5_H3K27ac_q30.sorted.dedup.bam", + E15C = "results_10M/bwa/input_E15.5_H3K27ac_q30.sorted.dedup.bam" log: - E10 = "logs/E10.5_H3K27ac.mergeBAM", - E11 = "logs/E11.5_H3K27ac.mergeBAM", - E12 = "logs/E12.5_H3K27ac.mergeBAM", - E13 = "logs/E13.5_H3K27ac.mergeBAM", - E14 = "logs/E14.5_H3K27ac.mergeBAM", - E15 = "logs/E15.5_H3K27ac.mergeBAM", - E10C = "logs/input_E10.5_H3K27ac.mergeBAM", - E11C = "logs/input_E11.5_H3K27ac.mergeBAM", - E12C = "logs/input_E12.5_H3K27ac.mergeBAM", - E13C = "logs/input_E13.5_H3K27ac.mergeBAM", - E14C = "logs/input_E14.5_H3K27ac.mergeBAM", - E15C = "logs/input_E15.5_H3K27ac.mergeBAM" + E10 = "results_10M/logs/E10.5_H3K27ac.mergeBAM", + E11 = "results_10M/logs/E11.5_H3K27ac.mergeBAM", + E12 = "results_10M/logs/E12.5_H3K27ac.mergeBAM", + E13 = "results_10M/logs/E13.5_H3K27ac.mergeBAM", + E14 = "results_10M/logs/E14.5_H3K27ac.mergeBAM", + E15 = "results_10M/logs/E15.5_H3K27ac.mergeBAM", + E10C = "results_10M/logs/input_E10.5_H3K27ac.mergeBAM", + E11C = "results_10M/logs/input_E11.5_H3K27ac.mergeBAM", + E12C = "results_10M/logs/input_E12.5_H3K27ac.mergeBAM", + E13C = "results_10M/logs/input_E13.5_H3K27ac.mergeBAM", + E14C = "results_10M/logs/input_E14.5_H3K27ac.mergeBAM", + E15C = "results_10M/logs/input_E15.5_H3K27ac.mergeBAM" run: shell("samtools merge {output.E10} {input.E10} 2> {log.E10}") shell("samtools merge {output.E11} {input.E11} 2> {log.E11}") @@ -253,49 +259,49 @@ rule mergeBAMreplicates: rule mappingStats: input: - a="results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", - b="results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam", - c="results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam", - d="results/bwa/{sample}_{stage}_{mark}.bam" + a="results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", + #b="results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam", + #c="results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.bam", + #d="results_10M/bwa/{sample}_{stage}_{mark}.bam" output: - a="results/qc/{sample}_{stage}_{mark}.dedup.flagstat.qc", - b="results/qc/{sample}_{stage}_{mark}.dupmark.flagstat.qc", - c="results/qc/{sample}_{stage}_{mark}.q30.flagstat.qc", - d="results/qc/{sample}_{stage}_{mark}.unfiltered.flagstat.qc", + a="results_10M/qc/{sample}_{stage}_{mark}.dedup.flagstat.qc", + # b="results_10M/qc/{sample}_{stage}_{mark}.dupmark.flagstat.qc", + # c="results_10M/qc/{sample}_{stage}_{mark}.q30.flagstat.qc", + # d="results_10M/qc/{sample}_{stage}_{mark}.unfiltered.flagstat.qc", run: shell("samtools flagstat {input.a} > {output.a}") - shell("samtools flagstat {input.b} > {output.b}") - shell("samtools flagstat {input.c} > {output.c}") - shell("samtools flagstat {input.d} > {output.d}") + # shell("samtools flagstat {input.b} > {output.b}") + # shell("samtools flagstat {input.c} > {output.c}") + # shell("samtools flagstat {input.d} > {output.d}") + + +rule sort_name: + input: + "results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam" + output: + tmp = "results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam" + log: + "results_10M/logs/{sample}_{stage}_{mark}.pbc.sort" + run: + shell("samtools sort -n {input} -o {output.tmp} 2> {log}") + +rule estimate_lib_complexity: + input: + "results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam" + output: + qc = "results_10M/qc/{sample}_{stage}_{mark}.pbc.qc", + log: + "results_10M/logs/{sample}_{stage}_{mark}.pbc" + shell: + """ + bedtools bamtobed -i {input} \ + | awk 'BEGIN{{OFS="\\t"}}{{print $1,$2,$4,$6}}' \ + | sort | uniq -c \ + | awk 'BEGIN{{mt=0;m0=0;m1=0;m2=0}} ($1==1){{m1=m1+1}} ($1==2){{m2=m2+1}} \ + {{m0=m0+1}} {{mt=mt+$1}} END{{printf "%d\\t%d\\t%d\\t%d\\t%f\\t%f\\t%f\\n" ,mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}}' \ + > {output.qc} + """ - -# rule sort_name: -# input: -# "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam" -# output: -# tmp = "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam" -# log: -# "logs/{sample}_{stage}_{mark}.pbc.sort" -# run: -# shell("samtools sort -n {input} -o {output.tmp} 2> {log}") -# -# rule estimate_lib_complexity: -# input: -# "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam" -# output: -# qc = "results/qc/{sample}_{stage}_{mark}.pbc.qc", -# log: -# "logs/{sample}_{stage}_{mark}.pbc" -# shell: -# """ -# bedtools bamtobed -i {input} \ -# | awk 'BEGIN{{OFS="\\t"}}{{print $1,$2,$4,$6}}' \ -# | sort | uniq -c \ -# | awk 'BEGIN{{mt=0;m0=0;m1=0;m2=0}} ($1==1){{m1=m1+1}} ($1==2){{m2=m2+1}} \ -# {{m0=m0+1}} {{mt=mt+$1}} END{{printf "%d\\t%d\\t%d\\t%d\\t%f\\t%f\\t%f\\n" ,mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}}' \ -# > {output.qc} -# """ -# ## convert to bedPE ## print read 1 scaffold, read 1 start coordinate, read 2 scaffold, read 2 end coordinate, strand read 1, strand read 2 ## remove mitochondrial genome @@ -314,13 +320,13 @@ rule mappingStats: rule deeptools_summary: input: - expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK) + expand(["results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark=MARK) output: - sum="results/qc/multibamsum.npz", - counts="results/qc/multibamsum.tab" + sum="results_10M/qc/H3K27ac_multibamsum.npz", + counts="results_10M/qc/H3K27ac_multibamsum.tab" threads: 32 log: - "logs/multisummary.deepTools" + "results_10M/logs/H3K27ac_multisummary.deepTools" shell: " multiBamSummary bins \ -p {threads} \ @@ -330,12 +336,12 @@ rule deeptools_summary: --outRawCounts {output.counts} 2> {log}" rule deeptools_correlation: - input: "results/qc/multibamsum.npz" + input: "results_10M/qc/H3K27ac_multibamsum.npz" output: - fig="results/qc/pearsoncor_multibamsum.png", - matrix="results/qc/pearsoncor_multibamsum_matrix.txt" + fig="results_10M/qc/H3K27ac_pearsoncor_multibamsum.png", + matrix="results_10M/qc/H3K27ac_pearsoncor_multibamsum_matrix.txt" log: - "logs/correlation.deepTools" + "results_10M/logs/H3K27ac_correlation.deepTools" shell: "plotCorrelation \ --corData {input} \ @@ -349,26 +355,26 @@ rule deeptools_correlation: rule deeptools_coverage: input: - bam = "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", - bai = "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai" + bam = "results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", + bai = "results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai" output: - "results/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw" + "results_10M/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw" log: - "logs/{sample}_{stage}_{mark}_coverage.deepTools" + "results_10M/logs/{sample}_{stage}_{mark}_coverage.deepTools" shell: "bamCoverage --bam {input.bam} --binSize 10 --normalizeUsing RPGC --effectiveGenomeSize 2308125349 --numberOfProcessors 4 -o {output} 2> {log}" rule deeptools_fingerprint: input: - bam = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK), - bai = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark = MARK) + bam = expand(["results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK), + bai = expand(["results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark = MARK) output: - fig="results/qc/multiBAM_fingerprint.png", - metrics="results/qc/multiBAM_fingerprint_metrics.txt", - rawcounts="results/qc/multiBAM_fingerprint_rawcounts.txt" + fig="results_10M/qc/H3K27ac_multiBAM_fingerprint.png", + metrics="results_10M/qc/H3K27ac_multiBAM_fingerprint_metrics.txt", + rawcounts="results_10M/qc/H3K27ac_multiBAM_fingerprint_rawcounts.txt" threads: 32 log: - "logs/fingerprint.deepTools" + "results_10M/logs/fingerprint.deepTools" shell: "plotFingerprint -p {threads} \ -b {input.bam} \ @@ -381,13 +387,13 @@ rule deeptools_fingerprint: rule deeptools_plotCoverage: input: - bam = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark=MARK), - bai = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark=MARK) + bam = expand(["results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark=MARK), + bai = expand(["results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark=MARK) output: - fig="results/qc/plot_coverage.png", - rawcounts="results/qc/plot_coverage_rawcounts.tab" + fig="results_10M/qc/H3K27ac_plot_coverage.png", + rawcounts="results_10M/qc/H3K27ac_plot_coverage_rawcounts.tab" log: - "logs/plotCoverage.deepTools" + "results_10M/logs/H3K27ac_plotCoverage.deepTools" shell: "plotCoverage --bamfiles {input.bam} --plotFile {output.fig} \ -n 1000000 \ @@ -401,9 +407,9 @@ rule deeptools_plotCoverage: rule bamtobed_crossC: input: - "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam" + "results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam" output: - tagAlign = "results/bed/{sample}_{stage}_{mark}_q30_SE.tagAlign.gz" + tagAlign = "results_10M/bed/{sample}_{stage}_{mark}_q30_SE.tagAlign.gz" shell: """ bedtools bamtobed -i {input} | \ @@ -415,10 +421,10 @@ rule bamtobed_crossC: ## Estimate read length from first 100 reads rule subsample_aligned_reads: input: - "results/bed/{sample}_{stage}_{mark}_q30_SE.tagAlign.gz" + "results_10M/bed/{sample}_{stage}_{mark}_q30_SE.tagAlign.gz" output: - subsample = "results/bed/{sample}_{stage}_{mark}.filt.sample.15Mreads.SE.tagAlign.gz", - tmp = "results/bed/{sample}_{stage}_{mark}_R1_trimmed_q30_SE.tagAlign.tmp" + subsample = "results_10M/bed/{sample}_{stage}_{mark}.filt.sample.15Mreads.SE.tagAlign.gz", + tmp = "results_10M/bed/{sample}_{stage}_{mark}_R1_trimmed_q30_SE.tagAlign.tmp" params: nreads= 15000000 run: @@ -437,12 +443,12 @@ rule subsample_aligned_reads: rule cross_correlation_SSP: input: - "results/bed/{sample}_{stage}_{mark}.filt.sample.15Mreads.SE.tagAlign.gz" + "results_10M/bed/{sample}_{stage}_{mark}.filt.sample.15Mreads.SE.tagAlign.gz" output: - CC_SCORES_FILE="results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc", - CC_PLOT_FILE="results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf" + CC_SCORES_FILE="results_10M/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc", + CC_PLOT_FILE="results_10M/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf" log: - "logs/{sample}_{stage}_{mark}_filt_15Mreads.SE.spp.log" + "results_10M/logs/{sample}_{stage}_{mark}_filt_15Mreads.SE.spp.log" params: EXCLUSION_RANGE_MIN=-500, EXCLUSION_RANGE_MAX=85 @@ -460,46 +466,46 @@ rule cross_correlation_SSP: rule call_peaks_macs2: input: - control = "results/bwa/{control}_{stage}_{mark}_q30.sorted.dedup.bam", - case = "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam" + control = "results_10M/bwa/{control}_{stage}_{mark}_q30.sorted.dedup.bam", + case = "results_10M/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam" output: - "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", - "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", - "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", + "results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", + "results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", + "results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", log: - "logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log" + "results_10M/logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log" params: name = "{case}_vs_{control}_{stage}_{mark}_macs2", shell: " macs2 callpeak -f BAM -t {input.case} \ -c {input.control} --keep-dup all \ - --outdir results/macs2/ -p 0.01 \ + --outdir results_10M/macs2/ -p 0.01 \ -n {params.name} \ -g mm 2> {log} " rule call_peaks_macs2_pooled_replicates: input: - E10 = "results/bwa/E10.5_H3K27ac_q30.sorted.pooled.dedup.bam", - E11 = "results/bwa/E11.5_H3K27ac_q30.sorted.pooled.dedup.bam", - E12 = "results/bwa/E12.5_H3K27ac_q30.sorted.pooled.dedup.bam", - E13 = "results/bwa/E13.5_H3K27ac_q30.sorted.pooled.dedup.bam", - E14 = "results/bwa/E14.5_H3K27ac_q30.sorted.pooled.dedup.bam", - E15 = "results/bwa/E15.5_H3K27ac_q30.sorted.pooled.dedup.bam", - E10C = "results/bwa/input_E10.5_H3K27ac_q30.sorted.dedup.bam", - E11C = "results/bwa/input_E11.5_H3K27ac_q30.sorted.dedup.bam", - E12C = "results/bwa/input_E12.5_H3K27ac_q30.sorted.dedup.bam", - E13C = "results/bwa/input_E13.5_H3K27ac_q30.sorted.dedup.bam", - E14C = "results/bwa/input_E14.5_H3K27ac_q30.sorted.dedup.bam", - E15C = "results/bwa/input_E15.5_H3K27ac_q30.sorted.dedup.bam" + E10 = "results_10M/bwa/E10.5_H3K27ac_q30.sorted.pooled.dedup.bam", + E11 = "results_10M/bwa/E11.5_H3K27ac_q30.sorted.pooled.dedup.bam", + E12 = "results_10M/bwa/E12.5_H3K27ac_q30.sorted.pooled.dedup.bam", + E13 = "results_10M/bwa/E13.5_H3K27ac_q30.sorted.pooled.dedup.bam", + E14 = "results_10M/bwa/E14.5_H3K27ac_q30.sorted.pooled.dedup.bam", + E15 = "results_10M/bwa/E15.5_H3K27ac_q30.sorted.pooled.dedup.bam", + E10C = "results_10M/bwa/input_E10.5_H3K27ac_q30.sorted.dedup.bam", + E11C = "results_10M/bwa/input_E11.5_H3K27ac_q30.sorted.dedup.bam", + E12C = "results_10M/bwa/input_E12.5_H3K27ac_q30.sorted.dedup.bam", + E13C = "results_10M/bwa/input_E13.5_H3K27ac_q30.sorted.dedup.bam", + E14C = "results_10M/bwa/input_E14.5_H3K27ac_q30.sorted.dedup.bam", + E15C = "results_10M/bwa/input_E15.5_H3K27ac_q30.sorted.dedup.bam" output: - directory("results/macs2/pooled/") + directory("results_10M/macs2/pooled/") log: - E10 = "results/qc/E10.5_H3K27ac.pooled.macs2", - E11 = "results/qc/E11.5_H3K27ac.pooled.macs2", - E12 = "results/qc/E12.5_H3K27ac.pooled.macs2", - E13 = "results/qc/E13.5_H3K27ac.pooled.macs2", - E14 = "results/qc/E14.5_H3K27ac.pooled.macs2", - E15 = "results/qc/E15.5_H3K27ac.pooled.macs2" + E10 = "results_10M/qc/E10.5_H3K27ac.pooled.macs2", + E11 = "results_10M/qc/E11.5_H3K27ac.pooled.macs2", + E12 = "results_10M/qc/E12.5_H3K27ac.pooled.macs2", + E13 = "results_10M/qc/E13.5_H3K27ac.pooled.macs2", + E14 = "results_10M/qc/E14.5_H3K27ac.pooled.macs2", + E15 = "results_10M/qc/E15.5_H3K27ac.pooled.macs2" params: E10 = "E10.5_H3K27ac.pooled.macs2", E11 = "E11.5_H3K27ac.pooled.macs2", @@ -510,32 +516,32 @@ rule call_peaks_macs2_pooled_replicates: run: shell("macs2 callpeak -f BAM -t {input.E10} \ -c {input.E10C} --keep-dup all \ - --outdir results/macs2/ -p 0.01 \ + --outdir results_10M/macs2/ -p 0.01 \ -n {params.E10} \ -g mm 2> {log.E10}" ) shell("macs2 callpeak -f BAM -t {input.E11} \ -c {input.E11C} --keep-dup all \ - --outdir results/macs2/ -p 0.01 \ + --outdir results_10M/macs2/ -p 0.01 \ -n {params.E11} \ -g mm 2> {log.E11}" ) shell("macs2 callpeak -f BAM -t {input.E12} \ -c {input.E12C} --keep-dup all \ - --outdir results/macs2/ -p 0.01 \ + --outdir results_10M/macs2/ -p 0.01 \ -n {params.E12} \ -g mm 2> {log.E12}" ) shell("macs2 callpeak -f BAM -t {input.E13} \ -c {input.E13C} --keep-dup all \ - --outdir results/macs2/ -p 0.01 \ + --outdir results_10M/macs2/ -p 0.01 \ -n {params.E13} \ -g mm 2> {log.E13}" ) shell("macs2 callpeak -f BAM -t {input.E14} \ -c {input.E14C} --keep-dup all \ - --outdir results/macs2/ -p 0.01 \ + --outdir results_10M/macs2/ -p 0.01 \ -n {params.E14} \ -g mm 2> {log.E14}" ) shell("macs2 callpeak -f BAM -t {input.E15} \ -c {input.E15C} --keep-dup all \ - --outdir results/macs2/ -p 0.01 \ + --outdir results_10M/macs2/ -p 0.01 \ -n {params.E15} \ -g mm 2> {log.E15}" ) @@ -548,25 +554,24 @@ rule call_peaks_macs2_pooled_replicates: ## Convert BAM to tagAlign file for calculating FRiP QC metric (Fraction of reads in peaks) rule bamToBed: input: - "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam" + "results_10M/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam" output: - "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed" + "results_10M/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed" log: - "logs/{case}_{stage}_{mark}.bamToBed" + "results_10M/logs/{case}_{stage}_{mark}.bamToBed" shell: "samtools sort -n {input} | bedtools bamtobed -i - > {output}" # ## Fraction of reads in peaks rule frip: input: - bed = "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed", - peak = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak" + bed = "results_10M/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed", + peak = "results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak" output: - "results/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt" + "results_10M/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt" shell: "python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}" - # =============================================================================================== # 9. Create consensus peaksets for replicates # =============================================================================================== @@ -576,25 +581,25 @@ rule frip: rule overlap_peaks: input: - E10= ["results/macs2/ENCFF213EBC_vs_ENCFF157KEH_E10.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF548BRR_vs_ENCFF825AVI_E10.5_H3K27ac_macs2_peaks.narrowPeak"], - E11= ["results/macs2/ENCFF512SFE_vs_ENCFF184CUE_E11.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF515PKL_vs_ENCFF376FGM_E11.5_H3K27ac_macs2_peaks.narrowPeak"], - E12= ["results/macs2/ENCFF011NFM_vs_ENCFF058AUT_E12.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF394TZN_vs_ENCFF203JQV_E12.5_H3K27ac_macs2_peaks.narrowPeak"], - E13= ["results/macs2/ENCFF194ORC_vs_ENCFF117QRC_E13.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF290ZNF_vs_ENCFF248PGK_E13.5_H3K27ac_macs2_peaks.narrowPeak"], - E14= ["results/macs2/ENCFF902HAR_vs_ENCFF002HZV_E14.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF327VAO_vs_ENCFF784ORI_E14.5_H3K27ac_macs2_peaks.narrowPeak"], - E15= ["results/macs2/ENCFF707WKL_vs_ENCFF182XFG_E15.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF584JFB_vs_ENCFF727QTS_E15.5_H3K27ac_macs2_peaks.narrowPeak"], - E10_pooled="results/macs2/E10.5_H3K27ac.pooled.macs2_peaks.narrowPeak", - E11_pooled="results/macs2/E11.5_H3K27ac.pooled.macs2_peaks.narrowPeak", - E12_pooled="results/macs2/E12.5_H3K27ac.pooled.macs2_peaks.narrowPeak", - E13_pooled="results/macs2/E13.5_H3K27ac.pooled.macs2_peaks.narrowPeak", - E14_pooled="results/macs2/E14.5_H3K27ac.pooled.macs2_peaks.narrowPeak", - E15_pooled="results/macs2/E15.5_H3K27ac.pooled.macs2_peaks.narrowPeak", + E10= ["results_10M/macs2/ENCFF213EBC_vs_ENCFF157KEH_E10.5_H3K27ac_macs2_peaks.narrowPeak", "results_10M/macs2/ENCFF548BRR_vs_ENCFF825AVI_E10.5_H3K27ac_macs2_peaks.narrowPeak"], + E11= ["results_10M/macs2/ENCFF512SFE_vs_ENCFF184CUE_E11.5_H3K27ac_macs2_peaks.narrowPeak", "results_10M/macs2/ENCFF515PKL_vs_ENCFF376FGM_E11.5_H3K27ac_macs2_peaks.narrowPeak"], + E12= ["results_10M/macs2/ENCFF011NFM_vs_ENCFF058AUT_E12.5_H3K27ac_macs2_peaks.narrowPeak", "results_10M/macs2/ENCFF394TZN_vs_ENCFF203JQV_E12.5_H3K27ac_macs2_peaks.narrowPeak"], + E13= ["results_10M/macs2/ENCFF194ORC_vs_ENCFF117QRC_E13.5_H3K27ac_macs2_peaks.narrowPeak", "results_10M/macs2/ENCFF290ZNF_vs_ENCFF248PGK_E13.5_H3K27ac_macs2_peaks.narrowPeak"], + E14= ["results_10M/macs2/ENCFF902HAR_vs_ENCFF002HZV_E14.5_H3K27ac_macs2_peaks.narrowPeak", "results_10M/macs2/ENCFF327VAO_vs_ENCFF784ORI_E14.5_H3K27ac_macs2_peaks.narrowPeak"], + E15= ["results_10M/macs2/ENCFF707WKL_vs_ENCFF182XFG_E15.5_H3K27ac_macs2_peaks.narrowPeak", "results_10M/macs2/ENCFF584JFB_vs_ENCFF727QTS_E15.5_H3K27ac_macs2_peaks.narrowPeak"], + E10_pooled="results_10M/macs2/E10.5_H3K27ac.pooled.macs2_peaks.narrowPeak", + E11_pooled="results_10M/macs2/E11.5_H3K27ac.pooled.macs2_peaks.narrowPeak", + E12_pooled="results_10M/macs2/E12.5_H3K27ac.pooled.macs2_peaks.narrowPeak", + E13_pooled="results_10M/macs2/E13.5_H3K27ac.pooled.macs2_peaks.narrowPeak", + E14_pooled="results_10M/macs2/E14.5_H3K27ac.pooled.macs2_peaks.narrowPeak", + E15_pooled="results_10M/macs2/E15.5_H3K27ac.pooled.macs2_peaks.narrowPeak", output: - E10= "results/macs2/H3K27ac_E10.5_overlap.narrowPeak", - E11= "results/macs2/H3K27ac_E11.5_overlap.narrowPeak", - E12= "results/macs2/H3K27ac_E12.5_overlap.narrowPeak", - E13= "results/macs2/H3K27ac_E13.5_overlap.narrowPeak", - E14= "results/macs2/H3K27ac_E14.5_overlap.narrowPeak", - E15= "results/macs2/H3K27ac_E15.5_overlap.narrowPeak" + E10= "results_10M/macs2/H3K27ac_E10.5_overlap.narrowPeak", + E11= "results_10M/macs2/H3K27ac_E11.5_overlap.narrowPeak", + E12= "results_10M/macs2/H3K27ac_E12.5_overlap.narrowPeak", + E13= "results_10M/macs2/H3K27ac_E13.5_overlap.narrowPeak", + E14= "results_10M/macs2/H3K27ac_E14.5_overlap.narrowPeak", + E15= "results_10M/macs2/H3K27ac_E15.5_overlap.narrowPeak" run: shell("python2.7 scripts/overlap_peaks.py {input.E10} {input.E10_pooled} {output.E10}") shell("python2.7 scripts/overlap_peaks.py {input.E11} {input.E11_pooled} {output.E11}") @@ -603,6 +608,37 @@ rule overlap_peaks: shell("python2.7 scripts/overlap_peaks.py {input.E14} {input.E14_pooled} {output.E14}") shell("python2.7 scripts/overlap_peaks.py {input.E15} {input.E15_pooled} {output.E15}") +rule overlap_frip: + input: + E10bam = "results_10M/bwa/E10.5_H3K27ac_q30.sorted.pooled.dedup.bam", + E11bam = "results_10M/bwa/E11.5_H3K27ac_q30.sorted.pooled.dedup.bam", + E12bam = "results_10M/bwa/E12.5_H3K27ac_q30.sorted.pooled.dedup.bam", + E13bam = "results_10M/bwa/E13.5_H3K27ac_q30.sorted.pooled.dedup.bam", + E14bam = "results_10M/bwa/E14.5_H3K27ac_q30.sorted.pooled.dedup.bam", + E15bam = "results_10M/bwa/E15.5_H3K27ac_q30.sorted.pooled.dedup.bam", + E10bed = "results_10M/macs2/H3K27ac_E10.5_overlap.narrowPeak", + E11bed = "results_10M/macs2/H3K27ac_E11.5_overlap.narrowPeak", + E12bed = "results_10M/macs2/H3K27ac_E12.5_overlap.narrowPeak", + E13bed = "results_10M/macs2/H3K27ac_E13.5_overlap.narrowPeak", + E14bed = "results_10M/macs2/H3K27ac_E14.5_overlap.narrowPeak", + E15bed = "results_10M/macs2/H3K27ac_E15.5_overlap.narrowPeak" + output: + E10bed = "results_10M/macs2/H3K27ac_E10.5_overlap.frip", + E11bed = "results_10M/macs2/H3K27ac_E11.5_overlap.frip", + E12bed = "results_10M/macs2/H3K27ac_E12.5_overlap.frip", + E13bed = "results_10M/macs2/H3K27ac_E13.5_overlap.frip", + E14bed = "results_10M/macs2/H3K27ac_E14.5_overlap.frip", + E15bed = "results_10M/macs2/H3K27ac_E15.5_overlap.frip" + run: + shell("python2.7 scripts/encode_frip.py {input.E10bam} {input.E10bed} > {output.E10bed}") + shell("python2.7 scripts/encode_frip.py {input.E11bam} {input.E11bed} > {output.E11bed}") + shell("python2.7 scripts/encode_frip.py {input.E12bam} {input.E12bed} > {output.E12bed}") + shell("python2.7 scripts/encode_frip.py {input.E13bam} {input.E13bed} > {output.E13bed}") + shell("python2.7 scripts/encode_frip.py {input.E14bam} {input.E14bed} > {output.E14bed}") + shell("python2.7 scripts/encode_frip.py {input.E15bam} {input.E15bed} > {output.E15bed}") + + + # =============================================================================================== # 10. Combine all QC into multiqc output # =============================================================================================== @@ -610,54 +646,54 @@ rule overlap_peaks: # rule multiqc: # input: # fastqc - # expand("results/fastqc/{sample}_R1_fastqc.html", sample=all_samples), - # expand("results/fastqc/{sample}_R2_fastqc.html", sample=all_samples), - # expand("results/fastqc/{sample}_R1_fastqc.zip", sample=all_samples), - # expand("results/fastqc/{sample}_R2_fastqc.zip", sample=all_samples), + # expand("results_10M/fastqc/{sample}_R1_fastqc.html", sample=all_samples), + # expand("results_10M/fastqc/{sample}_R2_fastqc.html", sample=all_samples), + # expand("results_10M/fastqc/{sample}_R1_fastqc.zip", sample=all_samples), + # expand("results_10M/fastqc/{sample}_R2_fastqc.zip", sample=all_samples), # # bwa - # expand("logs/{sample}.align", sample=all_samples), - # expand("logs/{sample}.flagstat.qc", sample=all_samples), + # expand("results_10M/logs/{sample}.align", sample=all_samples), + # expand("results_10M/logs/{sample}.flagstat.qc", sample=all_samples), # # preseq - # expand("results/preseq/{sample}.ccurve.txt", sample=all_samples), + # expand("results_10M/preseq/{sample}.ccurve.txt", sample=all_samples), # # picard - # expand("results/picard/{sample}_est_lib_complex_metrics.txt", sample=all_samples), + # expand("results_10M/picard/{sample}_est_lib_complex_metrics.txt", sample=all_samples), # # deepTools - # "results/deeptools/multibamsum.npz", - # "results/deeptools/multibamsum.tab", - # "results/deeptools/pearsoncor_multibamsum.png", - # "results/deeptools/pearsoncor_multibamsum_matrix.txt", - # expand("results/deeptools/{sample}.SeqDepthNorm.bw", sample=all_samples), - # "results/deeptools/multiBAM_fingerprint.png", - # "results/deeptools/multiBAM_fingerprint_metrics.txt", - # "results/deeptools/multiBAM_fingerprint_rawcounts.txt", - # "results/deeptools/plot_coverage.png", - # "results/deeptools/plot_coverage_rawcounts.tab", - # "results/deeptools/bamPEFragmentSize_hist.png", - # "results/deeptools/bamPEFragmentSize_rawcounts.tab", + # "results_10M/deeptools/multibamsum.npz", + # "results_10M/deeptools/multibamsum.tab", + # "results_10M/deeptools/pearsoncor_multibamsum.png", + # "results_10M/deeptools/pearsoncor_multibamsum_matrix.txt", + # expand("results_10M/deeptools/{sample}.SeqDepthNorm.bw", sample=all_samples), + # "results_10M/deeptools/multiBAM_fingerprint.png", + # "results_10M/deeptools/multiBAM_fingerprint_metrics.txt", + # "results_10M/deeptools/multiBAM_fingerprint_rawcounts.txt", + # "results_10M/deeptools/plot_coverage.png", + # "results_10M/deeptools/plot_coverage_rawcounts.tab", + # "results_10M/deeptools/bamPEFragmentSize_hist.png", + # "results_10M/deeptools/bamPEFragmentSize_rawcounts.tab", # # phantomPeaks - # expand("results/phantomPeaks/{sample}.spp.pdf", sample = IPS), - # expand("results/phantomPeaks/{sample}.spp.Rdata", sample = IPS), - # expand("results/phantomPeaks/{sample}.spp.out", sample = IPS), + # expand("results_10M/phantomPeaks/{sample}.spp.pdf", sample = IPS), + # expand("results_10M/phantomPeaks/{sample}.spp.Rdata", sample = IPS), + # expand("results_10M/phantomPeaks/{sample}.spp.out", sample = IPS), # # macs2 - # expand("results/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS), - # expand("results/macs2/{case}_vs_{control}_macs2_peaks.xls", zip, case=IPS, control=INPUTS), - # expand("results/macs2/{case}_vs_{control}_macs2_summits.bed", zip, case=IPS, control=INPUTS), - # expand("results/macs2/{case}-vs-{control}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS), - # expand("results/frip/{case}.frip.txt", case=IPS) + # expand("results_10M/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS), + # expand("results_10M/macs2/{case}_vs_{control}_macs2_peaks.xls", zip, case=IPS, control=INPUTS), + # expand("results_10M/macs2/{case}_vs_{control}_macs2_summits.bed", zip, case=IPS, control=INPUTS), + # expand("results_10M/macs2/{case}-vs-{control}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS), + # expand("results_10M/frip/{case}.frip.txt", case=IPS) # params: - # "results/fastqc/", - # "results/bwa/", - # "logs/", - # "results/deeptools/", - # "results/macs2/", - # "results/phantomPeaks/", - # "results/frip/", - # "results/picard/" + # "results_10M/fastqc/", + # "results_10M/bwa/", + # "results_10M/logs/", + # "results_10M/deeptools/", + # "results_10M/macs2/", + # "results_10M/phantomPeaks/", + # "results_10M/frip/", + # "results_10M/picard/" # output: - # directory("results/multiqc/multiqc_report_data/"), - # "results/multiqc/multiqc_report.html", + # directory("results_10M/multiqc/multiqc_report_data/"), + # "results_10M/multiqc/multiqc_report.html", # log: - # "logs/multiqc.log" + # "results_10M/logs/multiqc.log" # shell: - # "multiqc -f {params} -m fastqc -m samtools -m picard -m preseq -m deeptools -m phantompeakqualtools -m macs2 -o results/multiqc/ \ + # "multiqc -f {params} -m fastqc -m samtools -m picard -m preseq -m deeptools -m phantompeakqualtools -m macs2 -o results_10M/multiqc/ \ # -n multiqc_report.html 2> {log}"