Skip to content
Snippets Groups Projects
Commit c25710d6 authored by Laura Cook's avatar Laura Cook
Browse files

changed directory to call peaks on 10M reads

parent e650b43f
No related branches found
No related tags found
No related merge requests found
......@@ -12,7 +12,7 @@
# 10. Present QC for raw read, alignment, peak-calling in MultiQC
configfile: "configs/config_H3K427ac.yaml"
configfile: "configs/config_H3K27ac.yaml"
# Contains sample and genome information
# calls the SRR.txt file that contains the samples as either IP or control
......@@ -48,69 +48,75 @@ all_samples = IPS + INPUTS
rule all:
input:
expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}.dedup.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}.dupmark.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}.q30.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}.unfiltered.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
"results/bwa/E10.5_H3K27ac_q30.sorted.pooled.dedup.bam",
"results/bwa/E11.5_H3K27ac_q30.sorted.pooled.dedup.bam",
"results/bwa/E12.5_H3K27ac_q30.sorted.pooled.dedup.bam",
"results/bwa/E13.5_H3K27ac_q30.sorted.pooled.dedup.bam",
"results/bwa/E14.5_H3K27ac_q30.sorted.pooled.dedup.bam",
"results/bwa/E15.5_H3K27ac_q30.sorted.pooled.dedup.bam",
"results/bwa/input_E10.5_H3K27ac_q30.sorted.dedup.bam",
"results/bwa/input_E11.5_H3K27ac_q30.sorted.dedup.bam",
"results/bwa/input_E12.5_H3K27ac_q30.sorted.dedup.bam",
"results/bwa/input_E13.5_H3K27ac_q30.sorted.dedup.bam",
"results/bwa/input_E14.5_H3K27ac_q30.sorted.dedup.bam",
"results/bwa/input_E15.5_H3K27ac_q30.sorted.dedup.bam",
"logs/E10.5_H3K27ac.mergeBAM",
"logs/E11.5_H3K27ac.mergeBAM",
"logs/E12.5_H3K27ac.mergeBAM",
"logs/E13.5_H3K27ac.mergeBAM",
"logs/E14.5_H3K27ac.mergeBAM",
"logs/E15.5_H3K27ac.mergeBAM",
"logs/input_E10.5_H3K27ac.mergeBAM",
"logs/input_E11.5_H3K27ac.mergeBAM",
"logs/input_E12.5_H3K27ac.mergeBAM",
"logs/input_E13.5_H3K27ac.mergeBAM",
"logs/input_E14.5_H3K27ac.mergeBAM",
"logs/input_E15.5_H3K27ac.mergeBAM",
# "results/qc/multibamsum.npz",
# "results/qc/multibamsum.tab",
# "results/qc/pearsoncor_multibamsum.png",
# "results/qc/pearsoncor_multibamsum_matrix.txt",
# expand("results/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK),
# "results/qc/multiBAM_fingerprint.png",
# "results/qc/multiBAM_fingerprint_metrics.txt",
# "results/qc/multiBAM_fingerprint_rawcounts.txt",
# "results/qc/plot_coverage.png",
# "results/qc/plot_coverage_rawcounts.tab",
#expand("results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("logs/{sample}_{stage}_{mark}.pbc.sort", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("results/qc/{sample}_{stage}_{mark}.pbc.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc",zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed", zip, case=IPS, stage=STAGE, mark=MARK),
expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK),
expand("results/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
directory("results/macs2/pooled/"),
"results/macs2/H3K27ac_E10.5_overlap.narrowPeak",
"results/macs2/H3K27ac_E11.5_overlap.narrowPeak",
"results/macs2/H3K27ac_E12.5_overlap.narrowPeak",
"results/macs2/H3K27ac_E13.5_overlap.narrowPeak",
"results/macs2/H3K27ac_E14.5_overlap.narrowPeak",
"results/macs2/H3K27ac_E15.5_overlap.narrowPeak"
# directory("results/multiqc/multiqc_report_data/"),
# "results/multiqc/multiqc_report.html"
#expand("results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results_10M/qc/{sample}_{stage}_{mark}.dedup.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("results_10M/qc/{sample}_{stage}_{mark}.dupmark.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("results_10M/qc/{sample}_{stage}_{mark}.q30.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("results_10M/qc/{sample}_{stage}_{mark}.unfiltered.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
"results_10M/bwa/E10.5_H3K27ac_q30.sorted.pooled.dedup.bam",
"results_10M/bwa/E11.5_H3K27ac_q30.sorted.pooled.dedup.bam",
"results_10M/bwa/E12.5_H3K27ac_q30.sorted.pooled.dedup.bam",
"results_10M/bwa/E13.5_H3K27ac_q30.sorted.pooled.dedup.bam",
"results_10M/bwa/E14.5_H3K27ac_q30.sorted.pooled.dedup.bam",
"results_10M/bwa/E15.5_H3K27ac_q30.sorted.pooled.dedup.bam",
"results_10M/bwa/input_E10.5_H3K27ac_q30.sorted.dedup.bam",
"results_10M/bwa/input_E11.5_H3K27ac_q30.sorted.dedup.bam",
"results_10M/bwa/input_E12.5_H3K27ac_q30.sorted.dedup.bam",
"results_10M/bwa/input_E13.5_H3K27ac_q30.sorted.dedup.bam",
"results_10M/bwa/input_E14.5_H3K27ac_q30.sorted.dedup.bam",
"results_10M/bwa/input_E15.5_H3K27ac_q30.sorted.dedup.bam",
"results_10M/logs/E10.5_H3K27ac.mergeBAM",
"results_10M/logs/E11.5_H3K27ac.mergeBAM",
"results_10M/logs/E12.5_H3K27ac.mergeBAM",
"results_10M/logs/E13.5_H3K27ac.mergeBAM",
"results_10M/logs/E14.5_H3K27ac.mergeBAM",
"results_10M/logs/E15.5_H3K27ac.mergeBAM",
"results_10M/logs/input_E10.5_H3K27ac.mergeBAM",
"results_10M/logs/input_E11.5_H3K27ac.mergeBAM",
"results_10M/logs/input_E12.5_H3K27ac.mergeBAM",
"results_10M/logs/input_E13.5_H3K27ac.mergeBAM",
"results_10M/logs/input_E14.5_H3K27ac.mergeBAM",
"results_10M/logs/input_E15.5_H3K27ac.mergeBAM",
"results_10M/qc/H3K27ac_multibamsum.npz",
"results_10M/qc/H3K27ac_multibamsum.tab",
"results_10M/qc/H3K27ac_pearsoncor_multibamsum.png",
"results_10M/qc/H3K27ac_pearsoncor_multibamsum_matrix.txt",
expand("results_10M/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK),
"results_10M/qc/H3K27ac_multiBAM_fingerprint.png",
"results_10M/qc/H3K27ac_multiBAM_fingerprint_metrics.txt",
"results_10M/qc/H3K27ac_multiBAM_fingerprint_rawcounts.txt",
"results_10M/qc/H3K27ac_plot_coverage.png",
"results_10M/qc/H3K27ac_plot_coverage_rawcounts.tab",
#expand("results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("results_10M/logs/{sample}_{stage}_{mark}.pbc.sort", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("results_10M/qc/{sample}_{stage}_{mark}.pbc.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("results_10M/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc",zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("results_10M/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results_10M/logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results_10M/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed", zip, case=IPS, stage=STAGE, mark=MARK),
expand("results_10M/logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK),
expand("results_10M/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
directory("results_10M/macs2/pooled/"),
"results_10M/macs2/H3K27ac_E10.5_overlap.narrowPeak",
"results_10M/macs2/H3K27ac_E11.5_overlap.narrowPeak",
"results_10M/macs2/H3K27ac_E12.5_overlap.narrowPeak",
"results_10M/macs2/H3K27ac_E13.5_overlap.narrowPeak",
"results_10M/macs2/H3K27ac_E14.5_overlap.narrowPeak",
"results_10M/macs2/H3K27ac_E15.5_overlap.narrowPeak",
"results_10M/macs2/H3K27ac_E10.5_overlap.frip",
"results_10M/macs2/H3K27ac_E11.5_overlap.frip",
"results_10M/macs2/H3K27ac_E12.5_overlap.frip",
"results_10M/macs2/H3K27ac_E13.5_overlap.frip",
"results_10M/macs2/H3K27ac_E14.5_overlap.frip",
"results_10M/macs2/H3K27ac_E15.5_overlap.frip",
# directory("results_10M/multiqc/multiqc_report_data/"),
# "results_10M/multiqc/multiqc_report.html"
# ===============================================================================================
# 1. FASTQC
......@@ -149,86 +155,86 @@ rule all:
rule filter:
input:
"results/bwa/{sample}_{stage}_{mark}.bam"
"results_10M/bwa/{sample}_{stage}_{mark}.bam"
output:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam"
"results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.bam"
log:
"logs/{sample}_{stage}_{mark}.filter"
"results_10M/logs/{sample}_{stage}_{mark}.filter"
shell:
"samtools view -b -F 1804 -q 30 {input} | samtools sort -o {output} - 2> {log}"
rule markDups:
input:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam"
"results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.bam"
output:
bam="results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam",
dupQC="results/bwa/{sample}_{stage}_{mark}.dupmark.qc"
bam="results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam",
dupQC="results_10M/bwa/{sample}_{stage}_{mark}.dupmark.qc"
log:
"logs/{sample}_{stage}_{mark}.dedup"
"results_10M/logs/{sample}_{stage}_{mark}.dedup"
shell:
"picard MarkDuplicates I={input} O={output.bam} \
METRICS_FILE={output.dupQC} REMOVE_DUPLICATES=false ASSUME_SORTED=true 2> {log}"
rule dedup:
input:
"results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam"
"results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam"
output:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
"results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
log:
"logs/{sample}_{stage}_{mark}.dedup"
"results_10M/logs/{sample}_{stage}_{mark}.dedup"
shell:
"samtools view -F 1804 -b {input} | samtools sort -o {output} - 2> {log}"
rule indexBam:
input:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
"results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
output:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"
"results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"
log:
"logs/{sample}_{stage}_{mark}.indexBam"
"results_10M/logs/{sample}_{stage}_{mark}.indexBam"
shell:
"samtools index {input} {output} 2> {log}"
rule mergeBAMreplicates:
input:
E10 = ["results/bwa/ENCFF213EBC_E10.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF548BRR_E10.5_H3K27ac_q30.sorted.dedup.bam"],
E11 = ["results/bwa/ENCFF512SFE_E11.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF515PKL_E11.5_H3K27ac_q30.sorted.dedup.bam"],
E12 = ["results/bwa/ENCFF394TZN_E12.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF011NFM_E12.5_H3K27ac_q30.sorted.dedup.bam"],
E13 = ["results/bwa/ENCFF194ORC_E13.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF290ZNF_E13.5_H3K27ac_q30.sorted.dedup.bam"],
E14 = ["results/bwa/ENCFF327VAO_E14.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF902HAR_E14.5_H3K27ac_q30.sorted.dedup.bam"],
E15 = ["results/bwa/ENCFF584JFB_E15.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF707WKL_E15.5_H3K27ac_q30.sorted.dedup.bam"],
E10C = ["results/bwa/ENCFF157KEH_E10.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF825AVI_E10.5_H3K27ac_q30.sorted.dedup.bam"],
E11C = ["results/bwa/ENCFF184CUE_E11.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF376FGM_E11.5_H3K27ac_q30.sorted.dedup.bam"],
E12C = ["results/bwa/ENCFF203JQV_E12.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF058AUT_E12.5_H3K27ac_q30.sorted.dedup.bam"],
E13C = ["results/bwa/ENCFF117QRC_E13.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF248PGK_E13.5_H3K27ac_q30.sorted.dedup.bam"],
E14C = ["results/bwa/ENCFF784ORI_E14.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF002HZV_E14.5_H3K27ac_q30.sorted.dedup.bam"],
E15C = ["results/bwa/ENCFF727QTS_E15.5_H3K27ac_q30.sorted.dedup.bam", "results/bwa/ENCFF182XFG_E15.5_H3K27ac_q30.sorted.dedup.bam"],
E10 = ["results_10M/bwa/ENCFF213EBC_E10.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF548BRR_E10.5_H3K27ac_q30.sorted.dedup.bam"],
E11 = ["results_10M/bwa/ENCFF512SFE_E11.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF515PKL_E11.5_H3K27ac_q30.sorted.dedup.bam"],
E12 = ["results_10M/bwa/ENCFF394TZN_E12.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF011NFM_E12.5_H3K27ac_q30.sorted.dedup.bam"],
E13 = ["results_10M/bwa/ENCFF194ORC_E13.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF290ZNF_E13.5_H3K27ac_q30.sorted.dedup.bam"],
E14 = ["results_10M/bwa/ENCFF327VAO_E14.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF902HAR_E14.5_H3K27ac_q30.sorted.dedup.bam"],
E15 = ["results_10M/bwa/ENCFF584JFB_E15.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF707WKL_E15.5_H3K27ac_q30.sorted.dedup.bam"],
E10C = ["results_10M/bwa/ENCFF157KEH_E10.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF825AVI_E10.5_H3K27ac_q30.sorted.dedup.bam"],
E11C = ["results_10M/bwa/ENCFF184CUE_E11.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF376FGM_E11.5_H3K27ac_q30.sorted.dedup.bam"],
E12C = ["results_10M/bwa/ENCFF203JQV_E12.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF058AUT_E12.5_H3K27ac_q30.sorted.dedup.bam"],
E13C = ["results_10M/bwa/ENCFF117QRC_E13.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF248PGK_E13.5_H3K27ac_q30.sorted.dedup.bam"],
E14C = ["results_10M/bwa/ENCFF784ORI_E14.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF002HZV_E14.5_H3K27ac_q30.sorted.dedup.bam"],
E15C = ["results_10M/bwa/ENCFF727QTS_E15.5_H3K27ac_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF182XFG_E15.5_H3K27ac_q30.sorted.dedup.bam"],
output:
E10 = "results/bwa/E10.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E11 = "results/bwa/E11.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E12 = "results/bwa/E12.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E13 = "results/bwa/E13.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E14 = "results/bwa/E14.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E15 = "results/bwa/E15.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E10C = "results/bwa/input_E10.5_H3K27ac_q30.sorted.dedup.bam",
E11C = "results/bwa/input_E11.5_H3K27ac_q30.sorted.dedup.bam",
E12C = "results/bwa/input_E12.5_H3K27ac_q30.sorted.dedup.bam",
E13C = "results/bwa/input_E13.5_H3K27ac_q30.sorted.dedup.bam",
E14C = "results/bwa/input_E14.5_H3K27ac_q30.sorted.dedup.bam",
E15C = "results/bwa/input_E15.5_H3K27ac_q30.sorted.dedup.bam"
E10 = "results_10M/bwa/E10.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E11 = "results_10M/bwa/E11.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E12 = "results_10M/bwa/E12.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E13 = "results_10M/bwa/E13.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E14 = "results_10M/bwa/E14.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E15 = "results_10M/bwa/E15.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E10C = "results_10M/bwa/input_E10.5_H3K27ac_q30.sorted.dedup.bam",
E11C = "results_10M/bwa/input_E11.5_H3K27ac_q30.sorted.dedup.bam",
E12C = "results_10M/bwa/input_E12.5_H3K27ac_q30.sorted.dedup.bam",
E13C = "results_10M/bwa/input_E13.5_H3K27ac_q30.sorted.dedup.bam",
E14C = "results_10M/bwa/input_E14.5_H3K27ac_q30.sorted.dedup.bam",
E15C = "results_10M/bwa/input_E15.5_H3K27ac_q30.sorted.dedup.bam"
log:
E10 = "logs/E10.5_H3K27ac.mergeBAM",
E11 = "logs/E11.5_H3K27ac.mergeBAM",
E12 = "logs/E12.5_H3K27ac.mergeBAM",
E13 = "logs/E13.5_H3K27ac.mergeBAM",
E14 = "logs/E14.5_H3K27ac.mergeBAM",
E15 = "logs/E15.5_H3K27ac.mergeBAM",
E10C = "logs/input_E10.5_H3K27ac.mergeBAM",
E11C = "logs/input_E11.5_H3K27ac.mergeBAM",
E12C = "logs/input_E12.5_H3K27ac.mergeBAM",
E13C = "logs/input_E13.5_H3K27ac.mergeBAM",
E14C = "logs/input_E14.5_H3K27ac.mergeBAM",
E15C = "logs/input_E15.5_H3K27ac.mergeBAM"
E10 = "results_10M/logs/E10.5_H3K27ac.mergeBAM",
E11 = "results_10M/logs/E11.5_H3K27ac.mergeBAM",
E12 = "results_10M/logs/E12.5_H3K27ac.mergeBAM",
E13 = "results_10M/logs/E13.5_H3K27ac.mergeBAM",
E14 = "results_10M/logs/E14.5_H3K27ac.mergeBAM",
E15 = "results_10M/logs/E15.5_H3K27ac.mergeBAM",
E10C = "results_10M/logs/input_E10.5_H3K27ac.mergeBAM",
E11C = "results_10M/logs/input_E11.5_H3K27ac.mergeBAM",
E12C = "results_10M/logs/input_E12.5_H3K27ac.mergeBAM",
E13C = "results_10M/logs/input_E13.5_H3K27ac.mergeBAM",
E14C = "results_10M/logs/input_E14.5_H3K27ac.mergeBAM",
E15C = "results_10M/logs/input_E15.5_H3K27ac.mergeBAM"
run:
shell("samtools merge {output.E10} {input.E10} 2> {log.E10}")
shell("samtools merge {output.E11} {input.E11} 2> {log.E11}")
......@@ -253,49 +259,49 @@ rule mergeBAMreplicates:
rule mappingStats:
input:
a="results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam",
b="results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam",
c="results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam",
d="results/bwa/{sample}_{stage}_{mark}.bam"
a="results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam",
#b="results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam",
#c="results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.bam",
#d="results_10M/bwa/{sample}_{stage}_{mark}.bam"
output:
a="results/qc/{sample}_{stage}_{mark}.dedup.flagstat.qc",
b="results/qc/{sample}_{stage}_{mark}.dupmark.flagstat.qc",
c="results/qc/{sample}_{stage}_{mark}.q30.flagstat.qc",
d="results/qc/{sample}_{stage}_{mark}.unfiltered.flagstat.qc",
a="results_10M/qc/{sample}_{stage}_{mark}.dedup.flagstat.qc",
# b="results_10M/qc/{sample}_{stage}_{mark}.dupmark.flagstat.qc",
# c="results_10M/qc/{sample}_{stage}_{mark}.q30.flagstat.qc",
# d="results_10M/qc/{sample}_{stage}_{mark}.unfiltered.flagstat.qc",
run:
shell("samtools flagstat {input.a} > {output.a}")
shell("samtools flagstat {input.b} > {output.b}")
shell("samtools flagstat {input.c} > {output.c}")
shell("samtools flagstat {input.d} > {output.d}")
# shell("samtools flagstat {input.b} > {output.b}")
# shell("samtools flagstat {input.c} > {output.c}")
# shell("samtools flagstat {input.d} > {output.d}")
rule sort_name:
input:
"results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam"
output:
tmp = "results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam"
log:
"results_10M/logs/{sample}_{stage}_{mark}.pbc.sort"
run:
shell("samtools sort -n {input} -o {output.tmp} 2> {log}")
rule estimate_lib_complexity:
input:
"results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam"
output:
qc = "results_10M/qc/{sample}_{stage}_{mark}.pbc.qc",
log:
"results_10M/logs/{sample}_{stage}_{mark}.pbc"
shell:
"""
bedtools bamtobed -i {input} \
| awk 'BEGIN{{OFS="\\t"}}{{print $1,$2,$4,$6}}' \
| sort | uniq -c \
| awk 'BEGIN{{mt=0;m0=0;m1=0;m2=0}} ($1==1){{m1=m1+1}} ($1==2){{m2=m2+1}} \
{{m0=m0+1}} {{mt=mt+$1}} END{{printf "%d\\t%d\\t%d\\t%d\\t%f\\t%f\\t%f\\n" ,mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}}' \
> {output.qc}
"""
# rule sort_name:
# input:
# "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam"
# output:
# tmp = "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam"
# log:
# "logs/{sample}_{stage}_{mark}.pbc.sort"
# run:
# shell("samtools sort -n {input} -o {output.tmp} 2> {log}")
#
# rule estimate_lib_complexity:
# input:
# "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam"
# output:
# qc = "results/qc/{sample}_{stage}_{mark}.pbc.qc",
# log:
# "logs/{sample}_{stage}_{mark}.pbc"
# shell:
# """
# bedtools bamtobed -i {input} \
# | awk 'BEGIN{{OFS="\\t"}}{{print $1,$2,$4,$6}}' \
# | sort | uniq -c \
# | awk 'BEGIN{{mt=0;m0=0;m1=0;m2=0}} ($1==1){{m1=m1+1}} ($1==2){{m2=m2+1}} \
# {{m0=m0+1}} {{mt=mt+$1}} END{{printf "%d\\t%d\\t%d\\t%d\\t%f\\t%f\\t%f\\n" ,mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}}' \
# > {output.qc}
# """
#
## convert to bedPE
## print read 1 scaffold, read 1 start coordinate, read 2 scaffold, read 2 end coordinate, strand read 1, strand read 2
## remove mitochondrial genome
......@@ -314,13 +320,13 @@ rule mappingStats:
rule deeptools_summary:
input:
expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK)
expand(["results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark=MARK)
output:
sum="results/qc/multibamsum.npz",
counts="results/qc/multibamsum.tab"
sum="results_10M/qc/H3K27ac_multibamsum.npz",
counts="results_10M/qc/H3K27ac_multibamsum.tab"
threads: 32
log:
"logs/multisummary.deepTools"
"results_10M/logs/H3K27ac_multisummary.deepTools"
shell:
" multiBamSummary bins \
-p {threads} \
......@@ -330,12 +336,12 @@ rule deeptools_summary:
--outRawCounts {output.counts} 2> {log}"
rule deeptools_correlation:
input: "results/qc/multibamsum.npz"
input: "results_10M/qc/H3K27ac_multibamsum.npz"
output:
fig="results/qc/pearsoncor_multibamsum.png",
matrix="results/qc/pearsoncor_multibamsum_matrix.txt"
fig="results_10M/qc/H3K27ac_pearsoncor_multibamsum.png",
matrix="results_10M/qc/H3K27ac_pearsoncor_multibamsum_matrix.txt"
log:
"logs/correlation.deepTools"
"results_10M/logs/H3K27ac_correlation.deepTools"
shell:
"plotCorrelation \
--corData {input} \
......@@ -349,26 +355,26 @@ rule deeptools_correlation:
rule deeptools_coverage:
input:
bam = "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam",
bai = "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"
bam = "results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam",
bai = "results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"
output:
"results/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw"
"results_10M/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw"
log:
"logs/{sample}_{stage}_{mark}_coverage.deepTools"
"results_10M/logs/{sample}_{stage}_{mark}_coverage.deepTools"
shell:
"bamCoverage --bam {input.bam} --binSize 10 --normalizeUsing RPGC --effectiveGenomeSize 2308125349 --numberOfProcessors 4 -o {output} 2> {log}"
rule deeptools_fingerprint:
input:
bam = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK),
bai = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark = MARK)
bam = expand(["results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK),
bai = expand(["results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark = MARK)
output:
fig="results/qc/multiBAM_fingerprint.png",
metrics="results/qc/multiBAM_fingerprint_metrics.txt",
rawcounts="results/qc/multiBAM_fingerprint_rawcounts.txt"
fig="results_10M/qc/H3K27ac_multiBAM_fingerprint.png",
metrics="results_10M/qc/H3K27ac_multiBAM_fingerprint_metrics.txt",
rawcounts="results_10M/qc/H3K27ac_multiBAM_fingerprint_rawcounts.txt"
threads: 32
log:
"logs/fingerprint.deepTools"
"results_10M/logs/fingerprint.deepTools"
shell:
"plotFingerprint -p {threads} \
-b {input.bam} \
......@@ -381,13 +387,13 @@ rule deeptools_fingerprint:
rule deeptools_plotCoverage:
input:
bam = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark=MARK),
bai = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark=MARK)
bam = expand(["results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark=MARK),
bai = expand(["results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark=MARK)
output:
fig="results/qc/plot_coverage.png",
rawcounts="results/qc/plot_coverage_rawcounts.tab"
fig="results_10M/qc/H3K27ac_plot_coverage.png",
rawcounts="results_10M/qc/H3K27ac_plot_coverage_rawcounts.tab"
log:
"logs/plotCoverage.deepTools"
"results_10M/logs/H3K27ac_plotCoverage.deepTools"
shell:
"plotCoverage --bamfiles {input.bam} --plotFile {output.fig} \
-n 1000000 \
......@@ -401,9 +407,9 @@ rule deeptools_plotCoverage:
rule bamtobed_crossC:
input:
"results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam"
"results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam"
output:
tagAlign = "results/bed/{sample}_{stage}_{mark}_q30_SE.tagAlign.gz"
tagAlign = "results_10M/bed/{sample}_{stage}_{mark}_q30_SE.tagAlign.gz"
shell:
"""
bedtools bamtobed -i {input} | \
......@@ -415,10 +421,10 @@ rule bamtobed_crossC:
## Estimate read length from first 100 reads
rule subsample_aligned_reads:
input:
"results/bed/{sample}_{stage}_{mark}_q30_SE.tagAlign.gz"
"results_10M/bed/{sample}_{stage}_{mark}_q30_SE.tagAlign.gz"
output:
subsample = "results/bed/{sample}_{stage}_{mark}.filt.sample.15Mreads.SE.tagAlign.gz",
tmp = "results/bed/{sample}_{stage}_{mark}_R1_trimmed_q30_SE.tagAlign.tmp"
subsample = "results_10M/bed/{sample}_{stage}_{mark}.filt.sample.15Mreads.SE.tagAlign.gz",
tmp = "results_10M/bed/{sample}_{stage}_{mark}_R1_trimmed_q30_SE.tagAlign.tmp"
params:
nreads= 15000000
run:
......@@ -437,12 +443,12 @@ rule subsample_aligned_reads:
rule cross_correlation_SSP:
input:
"results/bed/{sample}_{stage}_{mark}.filt.sample.15Mreads.SE.tagAlign.gz"
"results_10M/bed/{sample}_{stage}_{mark}.filt.sample.15Mreads.SE.tagAlign.gz"
output:
CC_SCORES_FILE="results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc",
CC_PLOT_FILE="results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf"
CC_SCORES_FILE="results_10M/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc",
CC_PLOT_FILE="results_10M/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf"
log:
"logs/{sample}_{stage}_{mark}_filt_15Mreads.SE.spp.log"
"results_10M/logs/{sample}_{stage}_{mark}_filt_15Mreads.SE.spp.log"
params:
EXCLUSION_RANGE_MIN=-500,
EXCLUSION_RANGE_MAX=85
......@@ -460,46 +466,46 @@ rule cross_correlation_SSP:
rule call_peaks_macs2:
input:
control = "results/bwa/{control}_{stage}_{mark}_q30.sorted.dedup.bam",
case = "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam"
control = "results_10M/bwa/{control}_{stage}_{mark}_q30.sorted.dedup.bam",
case = "results_10M/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam"
output:
"results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls",
"results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed",
"results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak",
"results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls",
"results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed",
"results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak",
log:
"logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log"
"results_10M/logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log"
params:
name = "{case}_vs_{control}_{stage}_{mark}_macs2",
shell:
" macs2 callpeak -f BAM -t {input.case} \
-c {input.control} --keep-dup all \
--outdir results/macs2/ -p 0.01 \
--outdir results_10M/macs2/ -p 0.01 \
-n {params.name} \
-g mm 2> {log} "
rule call_peaks_macs2_pooled_replicates:
input:
E10 = "results/bwa/E10.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E11 = "results/bwa/E11.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E12 = "results/bwa/E12.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E13 = "results/bwa/E13.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E14 = "results/bwa/E14.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E15 = "results/bwa/E15.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E10C = "results/bwa/input_E10.5_H3K27ac_q30.sorted.dedup.bam",
E11C = "results/bwa/input_E11.5_H3K27ac_q30.sorted.dedup.bam",
E12C = "results/bwa/input_E12.5_H3K27ac_q30.sorted.dedup.bam",
E13C = "results/bwa/input_E13.5_H3K27ac_q30.sorted.dedup.bam",
E14C = "results/bwa/input_E14.5_H3K27ac_q30.sorted.dedup.bam",
E15C = "results/bwa/input_E15.5_H3K27ac_q30.sorted.dedup.bam"
E10 = "results_10M/bwa/E10.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E11 = "results_10M/bwa/E11.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E12 = "results_10M/bwa/E12.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E13 = "results_10M/bwa/E13.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E14 = "results_10M/bwa/E14.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E15 = "results_10M/bwa/E15.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E10C = "results_10M/bwa/input_E10.5_H3K27ac_q30.sorted.dedup.bam",
E11C = "results_10M/bwa/input_E11.5_H3K27ac_q30.sorted.dedup.bam",
E12C = "results_10M/bwa/input_E12.5_H3K27ac_q30.sorted.dedup.bam",
E13C = "results_10M/bwa/input_E13.5_H3K27ac_q30.sorted.dedup.bam",
E14C = "results_10M/bwa/input_E14.5_H3K27ac_q30.sorted.dedup.bam",
E15C = "results_10M/bwa/input_E15.5_H3K27ac_q30.sorted.dedup.bam"
output:
directory("results/macs2/pooled/")
directory("results_10M/macs2/pooled/")
log:
E10 = "results/qc/E10.5_H3K27ac.pooled.macs2",
E11 = "results/qc/E11.5_H3K27ac.pooled.macs2",
E12 = "results/qc/E12.5_H3K27ac.pooled.macs2",
E13 = "results/qc/E13.5_H3K27ac.pooled.macs2",
E14 = "results/qc/E14.5_H3K27ac.pooled.macs2",
E15 = "results/qc/E15.5_H3K27ac.pooled.macs2"
E10 = "results_10M/qc/E10.5_H3K27ac.pooled.macs2",
E11 = "results_10M/qc/E11.5_H3K27ac.pooled.macs2",
E12 = "results_10M/qc/E12.5_H3K27ac.pooled.macs2",
E13 = "results_10M/qc/E13.5_H3K27ac.pooled.macs2",
E14 = "results_10M/qc/E14.5_H3K27ac.pooled.macs2",
E15 = "results_10M/qc/E15.5_H3K27ac.pooled.macs2"
params:
E10 = "E10.5_H3K27ac.pooled.macs2",
E11 = "E11.5_H3K27ac.pooled.macs2",
......@@ -510,32 +516,32 @@ rule call_peaks_macs2_pooled_replicates:
run:
shell("macs2 callpeak -f BAM -t {input.E10} \
-c {input.E10C} --keep-dup all \
--outdir results/macs2/ -p 0.01 \
--outdir results_10M/macs2/ -p 0.01 \
-n {params.E10} \
-g mm 2> {log.E10}" )
shell("macs2 callpeak -f BAM -t {input.E11} \
-c {input.E11C} --keep-dup all \
--outdir results/macs2/ -p 0.01 \
--outdir results_10M/macs2/ -p 0.01 \
-n {params.E11} \
-g mm 2> {log.E11}" )
shell("macs2 callpeak -f BAM -t {input.E12} \
-c {input.E12C} --keep-dup all \
--outdir results/macs2/ -p 0.01 \
--outdir results_10M/macs2/ -p 0.01 \
-n {params.E12} \
-g mm 2> {log.E12}" )
shell("macs2 callpeak -f BAM -t {input.E13} \
-c {input.E13C} --keep-dup all \
--outdir results/macs2/ -p 0.01 \
--outdir results_10M/macs2/ -p 0.01 \
-n {params.E13} \
-g mm 2> {log.E13}" )
shell("macs2 callpeak -f BAM -t {input.E14} \
-c {input.E14C} --keep-dup all \
--outdir results/macs2/ -p 0.01 \
--outdir results_10M/macs2/ -p 0.01 \
-n {params.E14} \
-g mm 2> {log.E14}" )
shell("macs2 callpeak -f BAM -t {input.E15} \
-c {input.E15C} --keep-dup all \
--outdir results/macs2/ -p 0.01 \
--outdir results_10M/macs2/ -p 0.01 \
-n {params.E15} \
-g mm 2> {log.E15}" )
......@@ -548,25 +554,24 @@ rule call_peaks_macs2_pooled_replicates:
## Convert BAM to tagAlign file for calculating FRiP QC metric (Fraction of reads in peaks)
rule bamToBed:
input:
"results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam"
"results_10M/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam"
output:
"results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed"
"results_10M/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed"
log:
"logs/{case}_{stage}_{mark}.bamToBed"
"results_10M/logs/{case}_{stage}_{mark}.bamToBed"
shell:
"samtools sort -n {input} | bedtools bamtobed -i - > {output}"
# ## Fraction of reads in peaks
rule frip:
input:
bed = "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed",
peak = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak"
bed = "results_10M/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed",
peak = "results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak"
output:
"results/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt"
"results_10M/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt"
shell:
"python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}"
# ===============================================================================================
# 9. Create consensus peaksets for replicates
# ===============================================================================================
......@@ -576,25 +581,25 @@ rule frip:
rule overlap_peaks:
input:
E10= ["results/macs2/ENCFF213EBC_vs_ENCFF157KEH_E10.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF548BRR_vs_ENCFF825AVI_E10.5_H3K27ac_macs2_peaks.narrowPeak"],
E11= ["results/macs2/ENCFF512SFE_vs_ENCFF184CUE_E11.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF515PKL_vs_ENCFF376FGM_E11.5_H3K27ac_macs2_peaks.narrowPeak"],
E12= ["results/macs2/ENCFF011NFM_vs_ENCFF058AUT_E12.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF394TZN_vs_ENCFF203JQV_E12.5_H3K27ac_macs2_peaks.narrowPeak"],
E13= ["results/macs2/ENCFF194ORC_vs_ENCFF117QRC_E13.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF290ZNF_vs_ENCFF248PGK_E13.5_H3K27ac_macs2_peaks.narrowPeak"],
E14= ["results/macs2/ENCFF902HAR_vs_ENCFF002HZV_E14.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF327VAO_vs_ENCFF784ORI_E14.5_H3K27ac_macs2_peaks.narrowPeak"],
E15= ["results/macs2/ENCFF707WKL_vs_ENCFF182XFG_E15.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF584JFB_vs_ENCFF727QTS_E15.5_H3K27ac_macs2_peaks.narrowPeak"],
E10_pooled="results/macs2/E10.5_H3K27ac.pooled.macs2_peaks.narrowPeak",
E11_pooled="results/macs2/E11.5_H3K27ac.pooled.macs2_peaks.narrowPeak",
E12_pooled="results/macs2/E12.5_H3K27ac.pooled.macs2_peaks.narrowPeak",
E13_pooled="results/macs2/E13.5_H3K27ac.pooled.macs2_peaks.narrowPeak",
E14_pooled="results/macs2/E14.5_H3K27ac.pooled.macs2_peaks.narrowPeak",
E15_pooled="results/macs2/E15.5_H3K27ac.pooled.macs2_peaks.narrowPeak",
E10= ["results_10M/macs2/ENCFF213EBC_vs_ENCFF157KEH_E10.5_H3K27ac_macs2_peaks.narrowPeak", "results_10M/macs2/ENCFF548BRR_vs_ENCFF825AVI_E10.5_H3K27ac_macs2_peaks.narrowPeak"],
E11= ["results_10M/macs2/ENCFF512SFE_vs_ENCFF184CUE_E11.5_H3K27ac_macs2_peaks.narrowPeak", "results_10M/macs2/ENCFF515PKL_vs_ENCFF376FGM_E11.5_H3K27ac_macs2_peaks.narrowPeak"],
E12= ["results_10M/macs2/ENCFF011NFM_vs_ENCFF058AUT_E12.5_H3K27ac_macs2_peaks.narrowPeak", "results_10M/macs2/ENCFF394TZN_vs_ENCFF203JQV_E12.5_H3K27ac_macs2_peaks.narrowPeak"],
E13= ["results_10M/macs2/ENCFF194ORC_vs_ENCFF117QRC_E13.5_H3K27ac_macs2_peaks.narrowPeak", "results_10M/macs2/ENCFF290ZNF_vs_ENCFF248PGK_E13.5_H3K27ac_macs2_peaks.narrowPeak"],
E14= ["results_10M/macs2/ENCFF902HAR_vs_ENCFF002HZV_E14.5_H3K27ac_macs2_peaks.narrowPeak", "results_10M/macs2/ENCFF327VAO_vs_ENCFF784ORI_E14.5_H3K27ac_macs2_peaks.narrowPeak"],
E15= ["results_10M/macs2/ENCFF707WKL_vs_ENCFF182XFG_E15.5_H3K27ac_macs2_peaks.narrowPeak", "results_10M/macs2/ENCFF584JFB_vs_ENCFF727QTS_E15.5_H3K27ac_macs2_peaks.narrowPeak"],
E10_pooled="results_10M/macs2/E10.5_H3K27ac.pooled.macs2_peaks.narrowPeak",
E11_pooled="results_10M/macs2/E11.5_H3K27ac.pooled.macs2_peaks.narrowPeak",
E12_pooled="results_10M/macs2/E12.5_H3K27ac.pooled.macs2_peaks.narrowPeak",
E13_pooled="results_10M/macs2/E13.5_H3K27ac.pooled.macs2_peaks.narrowPeak",
E14_pooled="results_10M/macs2/E14.5_H3K27ac.pooled.macs2_peaks.narrowPeak",
E15_pooled="results_10M/macs2/E15.5_H3K27ac.pooled.macs2_peaks.narrowPeak",
output:
E10= "results/macs2/H3K27ac_E10.5_overlap.narrowPeak",
E11= "results/macs2/H3K27ac_E11.5_overlap.narrowPeak",
E12= "results/macs2/H3K27ac_E12.5_overlap.narrowPeak",
E13= "results/macs2/H3K27ac_E13.5_overlap.narrowPeak",
E14= "results/macs2/H3K27ac_E14.5_overlap.narrowPeak",
E15= "results/macs2/H3K27ac_E15.5_overlap.narrowPeak"
E10= "results_10M/macs2/H3K27ac_E10.5_overlap.narrowPeak",
E11= "results_10M/macs2/H3K27ac_E11.5_overlap.narrowPeak",
E12= "results_10M/macs2/H3K27ac_E12.5_overlap.narrowPeak",
E13= "results_10M/macs2/H3K27ac_E13.5_overlap.narrowPeak",
E14= "results_10M/macs2/H3K27ac_E14.5_overlap.narrowPeak",
E15= "results_10M/macs2/H3K27ac_E15.5_overlap.narrowPeak"
run:
shell("python2.7 scripts/overlap_peaks.py {input.E10} {input.E10_pooled} {output.E10}")
shell("python2.7 scripts/overlap_peaks.py {input.E11} {input.E11_pooled} {output.E11}")
......@@ -603,6 +608,37 @@ rule overlap_peaks:
shell("python2.7 scripts/overlap_peaks.py {input.E14} {input.E14_pooled} {output.E14}")
shell("python2.7 scripts/overlap_peaks.py {input.E15} {input.E15_pooled} {output.E15}")
rule overlap_frip:
input:
E10bam = "results_10M/bwa/E10.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E11bam = "results_10M/bwa/E11.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E12bam = "results_10M/bwa/E12.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E13bam = "results_10M/bwa/E13.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E14bam = "results_10M/bwa/E14.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E15bam = "results_10M/bwa/E15.5_H3K27ac_q30.sorted.pooled.dedup.bam",
E10bed = "results_10M/macs2/H3K27ac_E10.5_overlap.narrowPeak",
E11bed = "results_10M/macs2/H3K27ac_E11.5_overlap.narrowPeak",
E12bed = "results_10M/macs2/H3K27ac_E12.5_overlap.narrowPeak",
E13bed = "results_10M/macs2/H3K27ac_E13.5_overlap.narrowPeak",
E14bed = "results_10M/macs2/H3K27ac_E14.5_overlap.narrowPeak",
E15bed = "results_10M/macs2/H3K27ac_E15.5_overlap.narrowPeak"
output:
E10bed = "results_10M/macs2/H3K27ac_E10.5_overlap.frip",
E11bed = "results_10M/macs2/H3K27ac_E11.5_overlap.frip",
E12bed = "results_10M/macs2/H3K27ac_E12.5_overlap.frip",
E13bed = "results_10M/macs2/H3K27ac_E13.5_overlap.frip",
E14bed = "results_10M/macs2/H3K27ac_E14.5_overlap.frip",
E15bed = "results_10M/macs2/H3K27ac_E15.5_overlap.frip"
run:
shell("python2.7 scripts/encode_frip.py {input.E10bam} {input.E10bed} > {output.E10bed}")
shell("python2.7 scripts/encode_frip.py {input.E11bam} {input.E11bed} > {output.E11bed}")
shell("python2.7 scripts/encode_frip.py {input.E12bam} {input.E12bed} > {output.E12bed}")
shell("python2.7 scripts/encode_frip.py {input.E13bam} {input.E13bed} > {output.E13bed}")
shell("python2.7 scripts/encode_frip.py {input.E14bam} {input.E14bed} > {output.E14bed}")
shell("python2.7 scripts/encode_frip.py {input.E15bam} {input.E15bed} > {output.E15bed}")
# ===============================================================================================
# 10. Combine all QC into multiqc output
# ===============================================================================================
......@@ -610,54 +646,54 @@ rule overlap_peaks:
# rule multiqc:
# input:
# fastqc
# expand("results/fastqc/{sample}_R1_fastqc.html", sample=all_samples),
# expand("results/fastqc/{sample}_R2_fastqc.html", sample=all_samples),
# expand("results/fastqc/{sample}_R1_fastqc.zip", sample=all_samples),
# expand("results/fastqc/{sample}_R2_fastqc.zip", sample=all_samples),
# expand("results_10M/fastqc/{sample}_R1_fastqc.html", sample=all_samples),
# expand("results_10M/fastqc/{sample}_R2_fastqc.html", sample=all_samples),
# expand("results_10M/fastqc/{sample}_R1_fastqc.zip", sample=all_samples),
# expand("results_10M/fastqc/{sample}_R2_fastqc.zip", sample=all_samples),
# # bwa
# expand("logs/{sample}.align", sample=all_samples),
# expand("logs/{sample}.flagstat.qc", sample=all_samples),
# expand("results_10M/logs/{sample}.align", sample=all_samples),
# expand("results_10M/logs/{sample}.flagstat.qc", sample=all_samples),
# # preseq
# expand("results/preseq/{sample}.ccurve.txt", sample=all_samples),
# expand("results_10M/preseq/{sample}.ccurve.txt", sample=all_samples),
# # picard
# expand("results/picard/{sample}_est_lib_complex_metrics.txt", sample=all_samples),
# expand("results_10M/picard/{sample}_est_lib_complex_metrics.txt", sample=all_samples),
# # deepTools
# "results/deeptools/multibamsum.npz",
# "results/deeptools/multibamsum.tab",
# "results/deeptools/pearsoncor_multibamsum.png",
# "results/deeptools/pearsoncor_multibamsum_matrix.txt",
# expand("results/deeptools/{sample}.SeqDepthNorm.bw", sample=all_samples),
# "results/deeptools/multiBAM_fingerprint.png",
# "results/deeptools/multiBAM_fingerprint_metrics.txt",
# "results/deeptools/multiBAM_fingerprint_rawcounts.txt",
# "results/deeptools/plot_coverage.png",
# "results/deeptools/plot_coverage_rawcounts.tab",
# "results/deeptools/bamPEFragmentSize_hist.png",
# "results/deeptools/bamPEFragmentSize_rawcounts.tab",
# "results_10M/deeptools/multibamsum.npz",
# "results_10M/deeptools/multibamsum.tab",
# "results_10M/deeptools/pearsoncor_multibamsum.png",
# "results_10M/deeptools/pearsoncor_multibamsum_matrix.txt",
# expand("results_10M/deeptools/{sample}.SeqDepthNorm.bw", sample=all_samples),
# "results_10M/deeptools/multiBAM_fingerprint.png",
# "results_10M/deeptools/multiBAM_fingerprint_metrics.txt",
# "results_10M/deeptools/multiBAM_fingerprint_rawcounts.txt",
# "results_10M/deeptools/plot_coverage.png",
# "results_10M/deeptools/plot_coverage_rawcounts.tab",
# "results_10M/deeptools/bamPEFragmentSize_hist.png",
# "results_10M/deeptools/bamPEFragmentSize_rawcounts.tab",
# # phantomPeaks
# expand("results/phantomPeaks/{sample}.spp.pdf", sample = IPS),
# expand("results/phantomPeaks/{sample}.spp.Rdata", sample = IPS),
# expand("results/phantomPeaks/{sample}.spp.out", sample = IPS),
# expand("results_10M/phantomPeaks/{sample}.spp.pdf", sample = IPS),
# expand("results_10M/phantomPeaks/{sample}.spp.Rdata", sample = IPS),
# expand("results_10M/phantomPeaks/{sample}.spp.out", sample = IPS),
# # macs2
# expand("results/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS),
# expand("results/macs2/{case}_vs_{control}_macs2_peaks.xls", zip, case=IPS, control=INPUTS),
# expand("results/macs2/{case}_vs_{control}_macs2_summits.bed", zip, case=IPS, control=INPUTS),
# expand("results/macs2/{case}-vs-{control}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS),
# expand("results/frip/{case}.frip.txt", case=IPS)
# expand("results_10M/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS),
# expand("results_10M/macs2/{case}_vs_{control}_macs2_peaks.xls", zip, case=IPS, control=INPUTS),
# expand("results_10M/macs2/{case}_vs_{control}_macs2_summits.bed", zip, case=IPS, control=INPUTS),
# expand("results_10M/macs2/{case}-vs-{control}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS),
# expand("results_10M/frip/{case}.frip.txt", case=IPS)
# params:
# "results/fastqc/",
# "results/bwa/",
# "logs/",
# "results/deeptools/",
# "results/macs2/",
# "results/phantomPeaks/",
# "results/frip/",
# "results/picard/"
# "results_10M/fastqc/",
# "results_10M/bwa/",
# "results_10M/logs/",
# "results_10M/deeptools/",
# "results_10M/macs2/",
# "results_10M/phantomPeaks/",
# "results_10M/frip/",
# "results_10M/picard/"
# output:
# directory("results/multiqc/multiqc_report_data/"),
# "results/multiqc/multiqc_report.html",
# directory("results_10M/multiqc/multiqc_report_data/"),
# "results_10M/multiqc/multiqc_report.html",
# log:
# "logs/multiqc.log"
# "results_10M/logs/multiqc.log"
# shell:
# "multiqc -f {params} -m fastqc -m samtools -m picard -m preseq -m deeptools -m phantompeakqualtools -m macs2 -o results/multiqc/ \
# "multiqc -f {params} -m fastqc -m samtools -m picard -m preseq -m deeptools -m phantompeakqualtools -m macs2 -o results_10M/multiqc/ \
# -n multiqc_report.html 2> {log}"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment