Skip to content
Snippets Groups Projects
Commit 178ada24 authored by Laura Cook's avatar Laura Cook
Browse files

changed directory name to ouput peak calls for 10M reads per sample

parent c25710d6
No related branches found
No related tags found
No related merge requests found
...@@ -48,69 +48,74 @@ all_samples = IPS + INPUTS ...@@ -48,69 +48,74 @@ all_samples = IPS + INPUTS
rule all: rule all:
input: input:
expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), #expand("results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), #expand("results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), #expand("results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai", zip, sample=all_samples, stage=STAGE, mark=MARK), expand("results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}.dedup.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), expand("results_10M/qc/{sample}_{stage}_{mark}.dedup.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}.dupmark.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), #expand("results_10M/qc/{sample}_{stage}_{mark}.dupmark.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}.q30.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), #expand("results_10M/qc/{sample}_{stage}_{mark}.q30.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}.unfiltered.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), #expand("results_10M/qc/{sample}_{stage}_{mark}.unfiltered.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
"results/bwa/E10.5_H3K4me3_q30.sorted.pooled.dedup.bam", "results_10M/bwa/E10.5_H3K4me3_q30.sorted.pooled.dedup.bam",
"results/bwa/E11.5_H3K4me3_q30.sorted.pooled.dedup.bam", "results_10M/bwa/E11.5_H3K4me3_q30.sorted.pooled.dedup.bam",
"results/bwa/E12.5_H3K4me3_q30.sorted.pooled.dedup.bam", "results_10M/bwa/E12.5_H3K4me3_q30.sorted.pooled.dedup.bam",
"results/bwa/E13.5_H3K4me3_q30.sorted.pooled.dedup.bam", "results_10M/bwa/E13.5_H3K4me3_q30.sorted.pooled.dedup.bam",
"results/bwa/E14.5_H3K4me3_q30.sorted.pooled.dedup.bam", "results_10M/bwa/E14.5_H3K4me3_q30.sorted.pooled.dedup.bam",
"results/bwa/E15.5_H3K4me3_q30.sorted.pooled.dedup.bam", "results_10M/bwa/E15.5_H3K4me3_q30.sorted.pooled.dedup.bam",
"results/bwa/input_E10.5_H3K4me3_q30.sorted.dedup.bam", "results_10M/bwa/input_E10.5_H3K4me3_q30.sorted.dedup.bam",
"results/bwa/input_E11.5_H3K4me3_q30.sorted.dedup.bam", "results_10M/bwa/input_E11.5_H3K4me3_q30.sorted.dedup.bam",
"results/bwa/input_E12.5_H3K4me3_q30.sorted.dedup.bam", "results_10M/bwa/input_E12.5_H3K4me3_q30.sorted.dedup.bam",
"results/bwa/input_E13.5_H3K4me3_q30.sorted.dedup.bam", "results_10M/bwa/input_E13.5_H3K4me3_q30.sorted.dedup.bam",
"results/bwa/input_E14.5_H3K4me3_q30.sorted.dedup.bam", "results_10M/bwa/input_E14.5_H3K4me3_q30.sorted.dedup.bam",
"results/bwa/input_E15.5_H3K4me3_q30.sorted.dedup.bam", "results_10M/bwa/input_E15.5_H3K4me3_q30.sorted.dedup.bam",
"logs/E10.5_H3K4me3.mergeBAM", "results_10M/logs/E10.5_H3K4me3.mergeBAM",
"logs/E11.5_H3K4me3.mergeBAM", "results_10M/logs/E11.5_H3K4me3.mergeBAM",
"logs/E12.5_H3K4me3.mergeBAM", "results_10M/logs/E12.5_H3K4me3.mergeBAM",
"logs/E13.5_H3K4me3.mergeBAM", "results_10M/logs/E13.5_H3K4me3.mergeBAM",
"logs/E14.5_H3K4me3.mergeBAM", "results_10M/logs/E14.5_H3K4me3.mergeBAM",
"logs/E15.5_H3K4me3.mergeBAM", "results_10M/logs/E15.5_H3K4me3.mergeBAM",
"logs/input_E10.5_H3K4me3.mergeBAM", "results_10M/logs/input_E10.5_H3K4me3.mergeBAM",
"logs/input_E11.5_H3K4me3.mergeBAM", "results_10M/logs/input_E11.5_H3K4me3.mergeBAM",
"logs/input_E12.5_H3K4me3.mergeBAM", "results_10M/logs/input_E12.5_H3K4me3.mergeBAM",
"logs/input_E13.5_H3K4me3.mergeBAM", "results_10M/logs/input_E13.5_H3K4me3.mergeBAM",
"logs/input_E14.5_H3K4me3.mergeBAM", "results_10M/logs/input_E15.5_H3K4me3.mergeBAM",
"logs/input_E15.5_H3K4me3.mergeBAM", "results_10M/qc/H3K4me3_multibamsum.npz",
# "results/qc/multibamsum.npz", "results_10M/qc/H3K4me3_multibamsum.tab",
# "results/qc/multibamsum.tab", "results_10M/qc/H3K4me3_pearsoncor_multibamsum.png",
# "results/qc/pearsoncor_multibamsum.png", "results_10M/qc/H3K4me3_pearsoncor_multibamsum_matrix.txt",
# "results/qc/pearsoncor_multibamsum_matrix.txt", expand("results_10M/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK),
# expand("results/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK), "results_10M/qc/H3K4me3_multiBAM_fingerprint.png",
# "results/qc/multiBAM_fingerprint.png", "results_10M/qc/H3K4me3_multiBAM_fingerprint_metrics.txt",
# "results/qc/multiBAM_fingerprint_metrics.txt", "results_10M/qc/H3K4me3_multiBAM_fingerprint_rawcounts.txt",
# "results/qc/multiBAM_fingerprint_rawcounts.txt", "results_10M/qc/H3K4me3_plot_coverage.png",
# "results/qc/plot_coverage.png", "results_10M/qc/H3K4me3_plot_coverage_rawcounts.tab",
# "results/qc/plot_coverage_rawcounts.tab", #expand("results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), #expand("results_10M/logs/{sample}_{stage}_{mark}.pbc.sort", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("logs/{sample}_{stage}_{mark}.pbc.sort", zip, sample=all_samples, stage=STAGE, mark=MARK), #expand("results_10M/qc/{sample}_{stage}_{mark}.pbc.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("results/qc/{sample}_{stage}_{mark}.pbc.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), #expand("results_10M/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc",zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc",zip, sample=all_samples, stage=STAGE, mark=MARK), #expand("results_10M/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK), expand("results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), expand("results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), expand("results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), #expand("results_10M/logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), expand("results_10M/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed", zip, case=IPS, stage=STAGE, mark=MARK),
expand("results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed", zip, case=IPS, stage=STAGE, mark=MARK), expand("results_10M/logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK),
expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK), expand("results_10M/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), directory("results_10M/macs2/pooled/"),
directory("results/macs2/pooled/"), "results_10M/macs2/H3K4me3_E10.5_overlap.narrowPeak",
"results/macs2/H3K4me3_E10.5_overlap.narrowPeak", "results_10M/macs2/H3K4me3_E11.5_overlap.narrowPeak",
"results/macs2/H3K4me3_E11.5_overlap.narrowPeak", "results_10M/macs2/H3K4me3_E12.5_overlap.narrowPeak",
"results/macs2/H3K4me3_E12.5_overlap.narrowPeak", "results_10M/macs2/H3K4me3_E13.5_overlap.narrowPeak",
"results/macs2/H3K4me3_E13.5_overlap.narrowPeak", "results_10M/macs2/H3K4me3_E14.5_overlap.narrowPeak",
"results/macs2/H3K4me3_E14.5_overlap.narrowPeak", "results_10M/macs2/H3K4me3_E15.5_overlap.narrowPeak",
"results/macs2/H3K4me3_E15.5_overlap.narrowPeak" "results_10M/macs2/H3K4me3_E10.5_overlap.frip",
# directory("results/multiqc/multiqc_report_data/"), "results_10M/macs2/H3K4me3_E11.5_overlap.frip",
# "results/multiqc/multiqc_report.html" "results_10M/macs2/H3K4me3_E12.5_overlap.frip",
"results_10M/macs2/H3K4me3_E13.5_overlap.frip",
"results_10M/macs2/H3K4me3_E14.5_overlap.frip",
"results_10M/macs2/H3K4me3_E15.5_overlap.frip"
#directory("results_10M/multiqc/multiqc_report_data/"),
# "results_10M/multiqc/multiqc_report.html"
# =============================================================================================== # ===============================================================================================
# 1. FASTQC # 1. FASTQC
...@@ -149,86 +154,86 @@ rule all: ...@@ -149,86 +154,86 @@ rule all:
rule filter: rule filter:
input: input:
"results/bwa/{sample}_{stage}_{mark}.bam" "results_10M/bwa/{sample}_{stage}_{mark}.bam"
output: output:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam" "results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.bam"
log: log:
"logs/{sample}_{stage}_{mark}.filter" "results_10M/logs/{sample}_{stage}_{mark}.filter"
shell: shell:
"samtools view -b -F 1804 -q 30 {input} | samtools sort -o {output} - 2> {log}" "samtools view -b -F 1804 -q 30 {input} | samtools sort -o {output} - 2> {log}"
rule markDups: rule markDups:
input: input:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam" "results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.bam"
output: output:
bam="results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam", bam="results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam",
dupQC="results/bwa/{sample}_{stage}_{mark}.dupmark.qc" dupQC="results_10M/bwa/{sample}_{stage}_{mark}.dupmark.qc"
log: log:
"logs/{sample}_{stage}_{mark}.dedup" "results_10M/logs/{sample}_{stage}_{mark}.dedup"
shell: shell:
"picard MarkDuplicates I={input} O={output.bam} \ "picard MarkDuplicates I={input} O={output.bam} \
METRICS_FILE={output.dupQC} REMOVE_DUPLICATES=false ASSUME_SORTED=true 2> {log}" METRICS_FILE={output.dupQC} REMOVE_DUPLICATES=false ASSUME_SORTED=true 2> {log}"
rule dedup: rule dedup:
input: input:
"results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam" "results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam"
output: output:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam" "results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
log: log:
"logs/{sample}_{stage}_{mark}.dedup" "results_10M/logs/{sample}_{stage}_{mark}.dedup"
shell: shell:
"samtools view -F 1804 -b {input} | samtools sort -o {output} - 2> {log}" "samtools view -F 1804 -b {input} | samtools sort -o {output} - 2> {log}"
rule indexBam: rule indexBam:
input: input:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam" "results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
output: output:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai" "results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"
log: log:
"logs/{sample}_{stage}_{mark}.indexBam" "results_10M/logs/{sample}_{stage}_{mark}.indexBam"
shell: shell:
"samtools index {input} {output} 2> {log}" "samtools index {input} {output} 2> {log}"
rule mergeBAMreplicates: rule mergeBAMreplicates:
input: input:
E10 = ["results/bwa/ENCFF124UYX_E10.5_{mark}_q30.sorted.dedup.bam", "results/bwa/ENCFF045IPK_E10.5_{mark}_q30.sorted.dedup.bam"], E10 = ["results_10M/bwa/ENCFF124UYX_E10.5_H3K4me3_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF045IPK_E10.5_H3K4me3_q30.sorted.dedup.bam"],
E11 = ["results/bwa/ENCFF760QYZ_E11.5_{mark}_q30.sorted.dedup.bam", "results/bwa/ENCFF717QDV_E11.5_{mark}_q30.sorted.dedup.bam"], E11 = ["results_10M/bwa/ENCFF760QYZ_E11.5_H3K4me3_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF717QDV_E11.5_H3K4me3_q30.sorted.dedup.bam"],
E12 = ["results/bwa/ENCFF182ZPF_E12.5_{mark}_q30.sorted.dedup.bam", "results/bwa/ENCFF941QJZ_E12.5_{mark}_q30.sorted.dedup.bam"], E12 = ["results_10M/bwa/ENCFF182ZPF_E12.5_H3K4me3_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF941QJZ_E12.5_H3K4me3_q30.sorted.dedup.bam"],
E13 = ["results/bwa/ENCFF485UDC_E13.5_{mark}_q30.sorted.dedup.bam", "results/bwa/ENCFF124TAB_E13.5_{mark}_q30.sorted.dedup.bam"], E13 = ["results_10M/bwa/ENCFF485UDC_E13.5_H3K4me3_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF124TAB_E13.5_H3K4me3_q30.sorted.dedup.bam"],
E14 = ["results/bwa/ENCFF724DMU_E14.5_{mark}_q30.sorted.dedup.bam", "results/bwa/ENCFF665QBJ_E14.5_{mark}_q30.sorted.dedup.bam"], E14 = ["results_10M/bwa/ENCFF724DMU_E14.5_H3K4me3_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF665QBJ_E14.5_H3K4me3_q30.sorted.dedup.bam"],
E15 = ["results/bwa/ENCFF258KCR_E15.5_{mark}_q30.sorted.dedup.bam", "results/bwa/ENCFF401BKM_E15.5_{mark}_q30.sorted.dedup.bam"], E15 = ["results_10M/bwa/ENCFF258KCR_E15.5_H3K4me3_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF401BKM_E15.5_H3K4me3_q30.sorted.dedup.bam"],
E10C = ["results/bwa/ENCFF157KEH_E10.5_H3K4me3_q30.sorted.dedup.bam", "results/bwa/ENCFF825AVI_E10.5_H3K4me3_q30.sorted.dedup.bam"], E10C = ["results_10M/bwa/ENCFF157KEH_E10.5_H3K4me3_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF825AVI_E10.5_H3K4me3_q30.sorted.dedup.bam"],
E11C = ["results/bwa/ENCFF184CUE_E11.5_H3K4me3_q30.sorted.dedup.bam", "results/bwa/ENCFF376FGM_E11.5_H3K4me3_q30.sorted.dedup.bam"], E11C = ["results_10M/bwa/ENCFF184CUE_E11.5_H3K4me3_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF376FGM_E11.5_H3K4me3_q30.sorted.dedup.bam"],
E12C = ["results/bwa/ENCFF203JQV_E12.5_H3K4me3_q30.sorted.dedup.bam", "results/bwa/ENCFF058AUT_E12.5_H3K4me3_q30.sorted.dedup.bam"], E12C = ["results_10M/bwa/ENCFF203JQV_E12.5_H3K4me3_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF058AUT_E12.5_H3K4me3_q30.sorted.dedup.bam"],
E13C = ["results/bwa/ENCFF117QRC_E13.5_H3K4me3_q30.sorted.dedup.bam", "results/bwa/ENCFF248PGK_E13.5_H3K4me3_q30.sorted.dedup.bam"], E13C = ["results_10M/bwa/ENCFF117QRC_E13.5_H3K4me3_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF248PGK_E13.5_H3K4me3_q30.sorted.dedup.bam"],
E14C = ["results/bwa/ENCFF784ORI_E14.5_H3K4me3_q30.sorted.dedup.bam", "results/bwa/ENCFF002HZV_E14.5_H3K4me3_q30.sorted.dedup.bam"], E14C = ["results_10M/bwa/ENCFF784ORI_E14.5_H3K4me3_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF002HZV_E14.5_H3K4me3_q30.sorted.dedup.bam"],
E15C = ["results/bwa/ENCFF727QTS_E15.5_H3K4me3_q30.sorted.dedup.bam", "results/bwa/ENCFF182XFG_E15.5_H3K4me3_q30.sorted.dedup.bam"], E15C = ["results_10M/bwa/ENCFF727QTS_E15.5_H3K4me3_q30.sorted.dedup.bam", "results_10M/bwa/ENCFF182XFG_E15.5_H3K4me3_q30.sorted.dedup.bam"],
output: output:
E10 = "results/bwa/E10.5_H3K4me3_q30.sorted.pooled.dedup.bam", E10 = "results_10M/bwa/E10.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E11 = "results/bwa/E11.5_H3K4me3_q30.sorted.pooled.dedup.bam", E11 = "results_10M/bwa/E11.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E12 = "results/bwa/E12.5_H3K4me3_q30.sorted.pooled.dedup.bam", E12 = "results_10M/bwa/E12.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E13 = "results/bwa/E13.5_H3K4me3_q30.sorted.pooled.dedup.bam", E13 = "results_10M/bwa/E13.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E14 = "results/bwa/E14.5_H3K4me3_q30.sorted.pooled.dedup.bam", E14 = "results_10M/bwa/E14.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E15 = "results/bwa/E15.5_H3K4me3_q30.sorted.pooled.dedup.bam", E15 = "results_10M/bwa/E15.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E10C = "results/bwa/input_E10.5_H3K4me3_q30.sorted.dedup.bam", E10C = "results_10M/bwa/input_E10.5_H3K4me3_q30.sorted.dedup.bam",
E11C = "results/bwa/input_E11.5_H3K4me3_q30.sorted.dedup.bam", E11C = "results_10M/bwa/input_E11.5_H3K4me3_q30.sorted.dedup.bam",
E12C = "results/bwa/input_E12.5_H3K4me3_q30.sorted.dedup.bam", E12C = "results_10M/bwa/input_E12.5_H3K4me3_q30.sorted.dedup.bam",
E13C = "results/bwa/input_E13.5_H3K4me3_q30.sorted.dedup.bam", E13C = "results_10M/bwa/input_E13.5_H3K4me3_q30.sorted.dedup.bam",
E14C = "results/bwa/input_E14.5_H3K4me3_q30.sorted.dedup.bam", E14C = "results_10M/bwa/input_E14.5_H3K4me3_q30.sorted.dedup.bam",
E15C = "results/bwa/input_E15.5_H3K4me3_q30.sorted.dedup.bam" E15C = "results_10M/bwa/input_E15.5_H3K4me3_q30.sorted.dedup.bam"
log: log:
E10 = "logs/E10.5_H3K4me3.mergeBAM", E10 = "results_10M/logs/E10.5_H3K4me3.mergeBAM",
E11 = "logs/E11.5_H3K4me3.mergeBAM", E11 = "results_10M/logs/E11.5_H3K4me3.mergeBAM",
E12 = "logs/E12.5_H3K4me3.mergeBAM", E12 = "results_10M/logs/E12.5_H3K4me3.mergeBAM",
E13 = "logs/E13.5_H3K4me3.mergeBAM", E13 = "results_10M/logs/E13.5_H3K4me3.mergeBAM",
E14 = "logs/E14.5_H3K4me3.mergeBAM", E14 = "results_10M/logs/E14.5_H3K4me3.mergeBAM",
E15 = "logs/E15.5_H3K4me3.mergeBAM", E15 = "results_10M/logs/E15.5_H3K4me3.mergeBAM",
E10C = "logs/input_E10.5_H3K4me3.mergeBAM", E10C = "results_10M/logs/input_E10.5_H3K4me3.mergeBAM",
E11C = "logs/input_E11.5_H3K4me3.mergeBAM", E11C = "results_10M/logs/input_E11.5_H3K4me3.mergeBAM",
E12C = "logs/input_E12.5_H3K4me3.mergeBAM", E12C = "results_10M/logs/input_E12.5_H3K4me3.mergeBAM",
E13C = "logs/input_E13.5_H3K4me3.mergeBAM", E13C = "results_10M/logs/input_E13.5_H3K4me3.mergeBAM",
E14C = "logs/input_E14.5_H3K4me3.mergeBAM", E14C = "results_10M/logs/input_E14.5_H3K4me3.mergeBAM",
E15C = "logs/input_E15.5_H3K4me3.mergeBAM" E15C = "results_10M/logs/input_E15.5_H3K4me3.mergeBAM"
run: run:
shell("samtools merge {output.E10} {input.E10} 2> {log.E10}") shell("samtools merge {output.E10} {input.E10} 2> {log.E10}")
shell("samtools merge {output.E11} {input.E11} 2> {log.E11}") shell("samtools merge {output.E11} {input.E11} 2> {log.E11}")
...@@ -253,39 +258,39 @@ rule mergeBAMreplicates: ...@@ -253,39 +258,39 @@ rule mergeBAMreplicates:
rule mappingStats: rule mappingStats:
input: input:
a="results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", a="results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam",
b="results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam", #b="results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam",
c="results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam", #c="results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.bam",
d="results/bwa/{sample}_{stage}_{mark}.bam" #d="results_10M/bwa/{sample}_{stage}_{mark}.bam"
output: output:
a="results/qc/{sample}_{stage}_{mark}.dedup.flagstat.qc", a="results_10M/qc/{sample}_{stage}_{mark}.dedup.flagstat.qc",
b="results/qc/{sample}_{stage}_{mark}.dupmark.flagstat.qc", #b="results_10M/qc/{sample}_{stage}_{mark}.dupmark.flagstat.qc",
c="results/qc/{sample}_{stage}_{mark}.q30.flagstat.qc", #c="results_10M/qc/{sample}_{stage}_{mark}.q30.flagstat.qc",
d="results/qc/{sample}_{stage}_{mark}.unfiltered.flagstat.qc", #d="results_10M/qc/{sample}_{stage}_{mark}.unfiltered.flagstat.qc",
run: run:
shell("samtools flagstat {input.a} > {output.a}") shell("samtools flagstat {input.a} > {output.a}")
shell("samtools flagstat {input.b} > {output.b}") #shell("samtools flagstat {input.b} > {output.b}")
shell("samtools flagstat {input.c} > {output.c}") #shell("samtools flagstat {input.c} > {output.c}")
shell("samtools flagstat {input.d} > {output.d}") #shell("samtools flagstat {input.d} > {output.d}")
# rule sort_name: # rule sort_name:
# input: # input:
# "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam" # "results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam"
# output: # output:
# tmp = "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam" # tmp = "results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam"
# log: # log:
# "logs/{sample}_{stage}_{mark}.pbc.sort" # "results_10M/logs/{sample}_{stage}_{mark}.pbc.sort"
# run: # run:
# shell("samtools sort -n {input} -o {output.tmp} 2> {log}") # shell("samtools sort -n {input} -o {output.tmp} 2> {log}")
# #
# rule estimate_lib_complexity: # rule estimate_lib_complexity:
# input: # input:
# "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam" # "results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam"
# output: # output:
# qc = "results/qc/{sample}_{stage}_{mark}.pbc.qc", # qc = "results_10M/qc/{sample}_{stage}_{mark}.pbc.qc",
# log: # log:
# "logs/{sample}_{stage}_{mark}.pbc" # "results_10M/logs/{sample}_{stage}_{mark}.pbc"
# shell: # shell:
# """ # """
# bedtools bamtobed -i {input} \ # bedtools bamtobed -i {input} \
...@@ -314,13 +319,13 @@ rule mappingStats: ...@@ -314,13 +319,13 @@ rule mappingStats:
rule deeptools_summary: rule deeptools_summary:
input: input:
expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK) expand(["results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK)
output: output:
sum="results/qc/multibamsum.npz", sum="results_10M/qc/H3K4me3_multibamsum.npz",
counts="results/qc/multibamsum.tab" counts="results_10M/qc/H3K4me3_multibamsum.tab"
threads: 32 threads: 32
log: log:
"logs/multisummary.deepTools" "results_10M/logs/H3K4me3_multisummary.deepTools"
shell: shell:
" multiBamSummary bins \ " multiBamSummary bins \
-p {threads} \ -p {threads} \
...@@ -330,12 +335,12 @@ rule deeptools_summary: ...@@ -330,12 +335,12 @@ rule deeptools_summary:
--outRawCounts {output.counts} 2> {log}" --outRawCounts {output.counts} 2> {log}"
rule deeptools_correlation: rule deeptools_correlation:
input: "results/qc/multibamsum.npz" input: "results_10M/qc/H3K4me3_multibamsum.npz"
output: output:
fig="results/qc/pearsoncor_multibamsum.png", fig="results_10M/qc/H3K4me3_pearsoncor_multibamsum.png",
matrix="results/qc/pearsoncor_multibamsum_matrix.txt" matrix="results_10M/qc/H3K4me3_pearsoncor_multibamsum_matrix.txt"
log: log:
"logs/correlation.deepTools" "results_10M/logs/H3K4me3_correlation.deepTools"
shell: shell:
"plotCorrelation \ "plotCorrelation \
--corData {input} \ --corData {input} \
...@@ -349,26 +354,26 @@ rule deeptools_correlation: ...@@ -349,26 +354,26 @@ rule deeptools_correlation:
rule deeptools_coverage: rule deeptools_coverage:
input: input:
bam = "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", bam = "results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam",
bai = "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai" bai = "results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"
output: output:
"results/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw" "results_10M/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw"
log: log:
"logs/{sample}_{stage}_{mark}_coverage.deepTools" "results_10M/logs/{sample}_{stage}_{mark}_coverage.deepTools"
shell: shell:
"bamCoverage --bam {input.bam} --binSize 10 --normalizeUsing RPGC --effectiveGenomeSize 2308125349 --numberOfProcessors 4 -o {output} 2> {log}" "bamCoverage --bam {input.bam} --binSize 10 --normalizeUsing RPGC --effectiveGenomeSize 2308125349 --numberOfProcessors 4 -o {output} 2> {log}"
rule deeptools_fingerprint: rule deeptools_fingerprint:
input: input:
bam = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK), bam = expand(["results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK),
bai = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark = MARK) bai = expand(["results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark = MARK)
output: output:
fig="results/qc/multiBAM_fingerprint.png", fig="results_10M/qc/H3K4me3_multiBAM_fingerprint.png",
metrics="results/qc/multiBAM_fingerprint_metrics.txt", metrics="results_10M/qc/H3K4me3_multiBAM_fingerprint_metrics.txt",
rawcounts="results/qc/multiBAM_fingerprint_rawcounts.txt" rawcounts="results_10M/qc/H3K4me3_multiBAM_fingerprint_rawcounts.txt"
threads: 32 threads: 32
log: log:
"logs/fingerprint.deepTools" "results_10M/logs/H3K4me3_fingerprint.deepTools"
shell: shell:
"plotFingerprint -p {threads} \ "plotFingerprint -p {threads} \
-b {input.bam} \ -b {input.bam} \
...@@ -381,13 +386,13 @@ rule deeptools_fingerprint: ...@@ -381,13 +386,13 @@ rule deeptools_fingerprint:
rule deeptools_plotCoverage: rule deeptools_plotCoverage:
input: input:
bam = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark=MARK), bam = expand(["results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark=MARK),
bai = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark=MARK) bai = expand(["results_10M/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark=MARK)
output: output:
fig="results/qc/plot_coverage.png", fig="results_10M/qc/H3K4me3_plot_coverage.png",
rawcounts="results/qc/plot_coverage_rawcounts.tab" rawcounts="results_10M/qc/H3K4me3_plot_coverage_rawcounts.tab"
log: log:
"logs/plotCoverage.deepTools" "results_10M/logs/H3K4me3_plotCoverage.deepTools"
shell: shell:
"plotCoverage --bamfiles {input.bam} --plotFile {output.fig} \ "plotCoverage --bamfiles {input.bam} --plotFile {output.fig} \
-n 1000000 \ -n 1000000 \
...@@ -395,15 +400,16 @@ rule deeptools_plotCoverage: ...@@ -395,15 +400,16 @@ rule deeptools_plotCoverage:
--ignoreDuplicates 2> {log}" --ignoreDuplicates 2> {log}"
# =============================================================================================== # ===============================================================================================
# 6. Cross-correlation scores - following ENCODE code # 6. Cross-correlation scores - following ENCODE code
# =============================================================================================== # ===============================================================================================
rule bamtobed_crossC: rule bamtobed_crossC:
input: input:
"results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam" "results_10M/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam"
output: output:
tagAlign = "results/bed/{sample}_{stage}_{mark}_q30_SE.tagAlign.gz" tagAlign = "results_10M/bed/{sample}_{stage}_{mark}_q30_SE.tagAlign.gz"
shell: shell:
""" """
bedtools bamtobed -i {input} | \ bedtools bamtobed -i {input} | \
...@@ -415,10 +421,10 @@ rule bamtobed_crossC: ...@@ -415,10 +421,10 @@ rule bamtobed_crossC:
## Estimate read length from first 100 reads ## Estimate read length from first 100 reads
rule subsample_aligned_reads: rule subsample_aligned_reads:
input: input:
"results/bed/{sample}_{stage}_{mark}_q30_SE.tagAlign.gz" "results_10M/bed/{sample}_{stage}_{mark}_q30_SE.tagAlign.gz"
output: output:
subsample = "results/bed/{sample}_{stage}_{mark}.filt.sample.15Mreads.SE.tagAlign.gz", subsample = "results_10M/bed/{sample}_{stage}_{mark}.filt.sample.15Mreads.SE.tagAlign.gz",
tmp = "results/bed/{sample}_{stage}_{mark}_R1_trimmed_q30_SE.tagAlign.tmp" tmp = "results_10M/bed/{sample}_{stage}_{mark}_R1_trimmed_q30_SE.tagAlign.tmp"
params: params:
nreads= 15000000 nreads= 15000000
run: run:
...@@ -437,12 +443,12 @@ rule subsample_aligned_reads: ...@@ -437,12 +443,12 @@ rule subsample_aligned_reads:
rule cross_correlation_SSP: rule cross_correlation_SSP:
input: input:
"results/bed/{sample}_{stage}_{mark}.filt.sample.15Mreads.SE.tagAlign.gz" "results_10M/bed/{sample}_{stage}_{mark}.filt.sample.15Mreads.SE.tagAlign.gz"
output: output:
CC_SCORES_FILE="results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc", CC_SCORES_FILE="results_10M/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc",
CC_PLOT_FILE="results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf" CC_PLOT_FILE="results_10M/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf"
log: log:
"logs/{sample}_{stage}_{mark}_filt_15Mreads.SE.spp.log" "results_10M/logs/{sample}_{stage}_{mark}_filt_15Mreads.SE.spp.log"
params: params:
EXCLUSION_RANGE_MIN=-500, EXCLUSION_RANGE_MIN=-500,
EXCLUSION_RANGE_MAX=85 EXCLUSION_RANGE_MAX=85
...@@ -460,46 +466,46 @@ rule cross_correlation_SSP: ...@@ -460,46 +466,46 @@ rule cross_correlation_SSP:
rule call_peaks_macs2: rule call_peaks_macs2:
input: input:
control = "results/bwa/{control}_{stage}_{mark}_q30.sorted.dedup.bam", control = "results_10M/bwa/{control}_{stage}_{mark}_q30.sorted.dedup.bam",
case = "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam" case = "results_10M/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam"
output: output:
"results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", "results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls",
"results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", "results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed",
"results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", "results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak",
log: log:
"logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log" "results_10M/logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log"
params: params:
name = "{case}_vs_{control}_{stage}_{mark}_macs2", name = "{case}_vs_{control}_{stage}_{mark}_macs2",
shell: shell:
" macs2 callpeak -f BAM -t {input.case} \ " macs2 callpeak -f BAM -t {input.case} \
-c {input.control} --keep-dup all \ -c {input.control} --keep-dup all \
--outdir results/macs2/ -p 0.01 \ --outdir results_10M/macs2/ -p 0.01 \
-n {params.name} \ -n {params.name} \
-g mm 2> {log} " -g mm 2> {log} "
rule call_peaks_macs2_pooled_replicates: rule call_peaks_macs2_pooled_replicates:
input: input:
E10 = "results/bwa/E10.5_H3K4me3_q30.sorted.pooled.dedup.bam", E10 = "results_10M/bwa/E10.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E11 = "results/bwa/E11.5_H3K4me3_q30.sorted.pooled.dedup.bam", E11 = "results_10M/bwa/E11.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E12 = "results/bwa/E12.5_H3K4me3_q30.sorted.pooled.dedup.bam", E12 = "results_10M/bwa/E12.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E13 = "results/bwa/E13.5_H3K4me3_q30.sorted.pooled.dedup.bam", E13 = "results_10M/bwa/E13.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E14 = "results/bwa/E14.5_H3K4me3_q30.sorted.pooled.dedup.bam", E14 = "results_10M/bwa/E14.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E15 = "results/bwa/E15.5_H3K4me3_q30.sorted.pooled.dedup.bam", E15 = "results_10M/bwa/E15.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E10C = "results/bwa/input_E10.5_H3K4me3_q30.sorted.dedup.bam", E10C = "results_10M/bwa/input_E10.5_H3K4me3_q30.sorted.dedup.bam",
E11C = "results/bwa/input_E11.5_H3K4me3_q30.sorted.dedup.bam", E11C = "results_10M/bwa/input_E11.5_H3K4me3_q30.sorted.dedup.bam",
E12C = "results/bwa/input_E12.5_H3K4me3_q30.sorted.dedup.bam", E12C = "results_10M/bwa/input_E12.5_H3K4me3_q30.sorted.dedup.bam",
E13C = "results/bwa/input_E13.5_H3K4me3_q30.sorted.dedup.bam", E13C = "results_10M/bwa/input_E13.5_H3K4me3_q30.sorted.dedup.bam",
E14C = "results/bwa/input_E14.5_H3K4me3_q30.sorted.dedup.bam", E14C = "results_10M/bwa/input_E14.5_H3K4me3_q30.sorted.dedup.bam",
E15C = "results/bwa/input_E15.5_H3K4me3_q30.sorted.dedup.bam" E15C = "results_10M/bwa/input_E15.5_H3K4me3_q30.sorted.dedup.bam"
output: output:
directory("results/macs2/pooled/") directory("results_10M/macs2/pooled/")
log: log:
E10 = "results/qc/E10.5_H3K4me3.pooled.macs2", E10 = "results_10M/qc/E10.5_H3K4me3.pooled.macs2",
E11 = "results/qc/E11.5_H3K4me3.pooled.macs2", E11 = "results_10M/qc/E11.5_H3K4me3.pooled.macs2",
E12 = "results/qc/E12.5_H3K4me3.pooled.macs2", E12 = "results_10M/qc/E12.5_H3K4me3.pooled.macs2",
E13 = "results/qc/E13.5_H3K4me3.pooled.macs2", E13 = "results_10M/qc/E13.5_H3K4me3.pooled.macs2",
E14 = "results/qc/E14.5_H3K4me3.pooled.macs2", E14 = "results_10M/qc/E14.5_H3K4me3.pooled.macs2",
E15 = "results/qc/E15.5_H3K4me3.pooled.macs2" E15 = "results_10M/qc/E15.5_H3K4me3.pooled.macs2"
params: params:
E10 = "E10.5_H3K4me3.pooled.macs2", E10 = "E10.5_H3K4me3.pooled.macs2",
E11 = "E11.5_H3K4me3.pooled.macs2", E11 = "E11.5_H3K4me3.pooled.macs2",
...@@ -510,32 +516,32 @@ rule call_peaks_macs2_pooled_replicates: ...@@ -510,32 +516,32 @@ rule call_peaks_macs2_pooled_replicates:
run: run:
shell("macs2 callpeak -f BAM -t {input.E10} \ shell("macs2 callpeak -f BAM -t {input.E10} \
-c {input.E10C} --keep-dup all \ -c {input.E10C} --keep-dup all \
--outdir results/macs2/ -p 0.01 \ --outdir results_10M/macs2/ -p 0.01 \
-n {params.E10} \ -n {params.E10} \
-g mm 2> {log.E10}" ) -g mm 2> {log.E10}" )
shell("macs2 callpeak -f BAM -t {input.E11} \ shell("macs2 callpeak -f BAM -t {input.E11} \
-c {input.E11C} --keep-dup all \ -c {input.E11C} --keep-dup all \
--outdir results/macs2/ -p 0.01 \ --outdir results_10M/macs2/ -p 0.01 \
-n {params.E11} \ -n {params.E11} \
-g mm 2> {log.E11}" ) -g mm 2> {log.E11}" )
shell("macs2 callpeak -f BAM -t {input.E12} \ shell("macs2 callpeak -f BAM -t {input.E12} \
-c {input.E12C} --keep-dup all \ -c {input.E12C} --keep-dup all \
--outdir results/macs2/ -p 0.01 \ --outdir results_10M/macs2/ -p 0.01 \
-n {params.E12} \ -n {params.E12} \
-g mm 2> {log.E12}" ) -g mm 2> {log.E12}" )
shell("macs2 callpeak -f BAM -t {input.E13} \ shell("macs2 callpeak -f BAM -t {input.E13} \
-c {input.E13C} --keep-dup all \ -c {input.E13C} --keep-dup all \
--outdir results/macs2/ -p 0.01 \ --outdir results_10M/macs2/ -p 0.01 \
-n {params.E13} \ -n {params.E13} \
-g mm 2> {log.E13}" ) -g mm 2> {log.E13}" )
shell("macs2 callpeak -f BAM -t {input.E14} \ shell("macs2 callpeak -f BAM -t {input.E14} \
-c {input.E14C} --keep-dup all \ -c {input.E14C} --keep-dup all \
--outdir results/macs2/ -p 0.01 \ --outdir results_10M/macs2/ -p 0.01 \
-n {params.E14} \ -n {params.E14} \
-g mm 2> {log.E14}" ) -g mm 2> {log.E14}" )
shell("macs2 callpeak -f BAM -t {input.E15} \ shell("macs2 callpeak -f BAM -t {input.E15} \
-c {input.E15C} --keep-dup all \ -c {input.E15C} --keep-dup all \
--outdir results/macs2/ -p 0.01 \ --outdir results_10M/macs2/ -p 0.01 \
-n {params.E15} \ -n {params.E15} \
-g mm 2> {log.E15}" ) -g mm 2> {log.E15}" )
...@@ -548,21 +554,21 @@ rule call_peaks_macs2_pooled_replicates: ...@@ -548,21 +554,21 @@ rule call_peaks_macs2_pooled_replicates:
## Convert BAM to tagAlign file for calculating FRiP QC metric (Fraction of reads in peaks) ## Convert BAM to tagAlign file for calculating FRiP QC metric (Fraction of reads in peaks)
rule bamToBed: rule bamToBed:
input: input:
"results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam" "results_10M/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam"
output: output:
"results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed" "results_10M/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed"
log: log:
"logs/{case}_{stage}_{mark}.bamToBed" "results_10M/logs/{case}_{stage}_{mark}.bamToBed"
shell: shell:
"samtools sort -n {input} | bedtools bamtobed -i - > {output}" "samtools sort -n {input} | bedtools bamtobed -i - > {output}"
# ## Fraction of reads in peaks # ## Fraction of reads in peaks
rule frip: rule frip:
input: input:
bed = "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed", bed = "results_10M/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed",
peak = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak" peak = "results_10M/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak"
output: output:
"results/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt" "results_10M/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt"
shell: shell:
"python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}" "python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}"
...@@ -576,25 +582,25 @@ rule frip: ...@@ -576,25 +582,25 @@ rule frip:
rule overlap_peaks: rule overlap_peaks:
input: input:
E10= ["results/macs2/ENCFF124UYX_vs_ENCFF157KEH_E10.5_H3K4me3_macs2_peaks.narrowPeak", "results/macs2/ENCFF045IPK_vs_ENCFF825AVI_E10.5_H3K4me3_macs2_peaks.narrowPeak"], E10= ["results_10M/macs2/ENCFF124UYX_vs_ENCFF157KEH_E10.5_H3K4me3_macs2_peaks.narrowPeak", "results_10M/macs2/ENCFF045IPK_vs_ENCFF825AVI_E10.5_H3K4me3_macs2_peaks.narrowPeak"],
E11= ["results/macs2/ENCFF760QYZ_vs_ENCFF184CUE_E11.5_H3K4me3_macs2_peaks.narrowPeak", "results/macs2/ENCFF717QDV_vs_ENCFF376FGM_E11.5_H3K4me3_macs2_peaks.narrowPeak"], E11= ["results_10M/macs2/ENCFF760QYZ_vs_ENCFF184CUE_E11.5_H3K4me3_macs2_peaks.narrowPeak", "results_10M/macs2/ENCFF717QDV_vs_ENCFF376FGM_E11.5_H3K4me3_macs2_peaks.narrowPeak"],
E12= ["results/macs2/ENCFF941QJZ_vs_ENCFF058AUT_E12.5_H3K4me3_macs2_peaks.narrowPeak", "results/macs2/ENCFF182ZPF_vs_ENCFF203JQV_E12.5_H3K4me3_macs2_peaks.narrowPeak"], E12= ["results_10M/macs2/ENCFF941QJZ_vs_ENCFF058AUT_E12.5_H3K4me3_macs2_peaks.narrowPeak", "results_10M/macs2/ENCFF182ZPF_vs_ENCFF203JQV_E12.5_H3K4me3_macs2_peaks.narrowPeak"],
E13= ["results/macs2/ENCFF485UDC_vs_ENCFF117QRC_E13.5_H3K4me3_macs2_peaks.narrowPeak", "results/macs2/ENCFF124TAB_vs_ENCFF248PGK_E13.5_H3K4me3_macs2_peaks.narrowPeak"], E13= ["results_10M/macs2/ENCFF485UDC_vs_ENCFF117QRC_E13.5_H3K4me3_macs2_peaks.narrowPeak", "results_10M/macs2/ENCFF124TAB_vs_ENCFF248PGK_E13.5_H3K4me3_macs2_peaks.narrowPeak"],
E14= ["results/macs2/ENCFF665QBJ_vs_ENCFF002HZV_E14.5_H3K4me3_macs2_peaks.narrowPeak", "results/macs2/ENCFF724DMU_vs_ENCFF784ORI_E14.5_H3K4me3_macs2_peaks.narrowPeak"], E14= ["results_10M/macs2/ENCFF665QBJ_vs_ENCFF002HZV_E14.5_H3K4me3_macs2_peaks.narrowPeak", "results_10M/macs2/ENCFF724DMU_vs_ENCFF784ORI_E14.5_H3K4me3_macs2_peaks.narrowPeak"],
E15= ["results/macs2/ENCFF401BKM_vs_ENCFF182XFG_E15.5_H3K4me3_macs2_peaks.narrowPeak", "results/macs2/ENCFF258KCR_vs_ENCFF727QTS_E15.5_H3K4me3_macs2_peaks.narrowPeak"], E15= ["results_10M/macs2/ENCFF401BKM_vs_ENCFF182XFG_E15.5_H3K4me3_macs2_peaks.narrowPeak", "results_10M/macs2/ENCFF258KCR_vs_ENCFF727QTS_E15.5_H3K4me3_macs2_peaks.narrowPeak"],
E10_pooled="results/macs2/E10.5_H3K4me3.pooled.macs2_peaks.narrowPeak", E10_pooled="results_10M/macs2/E10.5_H3K4me3.pooled.macs2_peaks.narrowPeak",
E11_pooled="results/macs2/E11.5_H3K4me3.pooled.macs2_peaks.narrowPeak", E11_pooled="results_10M/macs2/E11.5_H3K4me3.pooled.macs2_peaks.narrowPeak",
E12_pooled="results/macs2/E12.5_H3K4me3.pooled.macs2_peaks.narrowPeak", E12_pooled="results_10M/macs2/E12.5_H3K4me3.pooled.macs2_peaks.narrowPeak",
E13_pooled="results/macs2/E13.5_H3K4me3.pooled.macs2_peaks.narrowPeak", E13_pooled="results_10M/macs2/E13.5_H3K4me3.pooled.macs2_peaks.narrowPeak",
E14_pooled="results/macs2/E14.5_H3K4me3.pooled.macs2_peaks.narrowPeak", E14_pooled="results_10M/macs2/E14.5_H3K4me3.pooled.macs2_peaks.narrowPeak",
E15_pooled="results/macs2/E15.5_H3K4me3.pooled.macs2_peaks.narrowPeak", E15_pooled="results_10M/macs2/E15.5_H3K4me3.pooled.macs2_peaks.narrowPeak",
output: output:
E10= "results/macs2/H3K4me3_E10.5_overlap.narrowPeak", E10= "results_10M/macs2/H3K4me3_E10.5_overlap.narrowPeak",
E11= "results/macs2/H3K4me3_E11.5_overlap.narrowPeak", E11= "results_10M/macs2/H3K4me3_E11.5_overlap.narrowPeak",
E12= "results/macs2/H3K4me3_E12.5_overlap.narrowPeak", E12= "results_10M/macs2/H3K4me3_E12.5_overlap.narrowPeak",
E13= "results/macs2/H3K4me3_E13.5_overlap.narrowPeak", E13= "results_10M/macs2/H3K4me3_E13.5_overlap.narrowPeak",
E14= "results/macs2/H3K4me3_E14.5_overlap.narrowPeak", E14= "results_10M/macs2/H3K4me3_E14.5_overlap.narrowPeak",
E15= "results/macs2/H3K4me3_E15.5_overlap.narrowPeak" E15= "results_10M/macs2/H3K4me3_E15.5_overlap.narrowPeak"
run: run:
shell("python2.7 scripts/overlap_peaks.py {input.E10} {input.E10_pooled} {output.E10}") shell("python2.7 scripts/overlap_peaks.py {input.E10} {input.E10_pooled} {output.E10}")
shell("python2.7 scripts/overlap_peaks.py {input.E11} {input.E11_pooled} {output.E11}") shell("python2.7 scripts/overlap_peaks.py {input.E11} {input.E11_pooled} {output.E11}")
...@@ -603,6 +609,39 @@ rule overlap_peaks: ...@@ -603,6 +609,39 @@ rule overlap_peaks:
shell("python2.7 scripts/overlap_peaks.py {input.E14} {input.E14_pooled} {output.E14}") shell("python2.7 scripts/overlap_peaks.py {input.E14} {input.E14_pooled} {output.E14}")
shell("python2.7 scripts/overlap_peaks.py {input.E15} {input.E15_pooled} {output.E15}") shell("python2.7 scripts/overlap_peaks.py {input.E15} {input.E15_pooled} {output.E15}")
rule overlap_frip:
input:
E10bam = "results_10M/bwa/E10.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E11bam = "results_10M/bwa/E11.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E12bam = "results_10M/bwa/E12.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E13bam = "results_10M/bwa/E13.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E14bam = "results_10M/bwa/E14.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E15bam = "results_10M/bwa/E15.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E10bed = "results_10M/macs2/H3K4me3_E10.5_overlap.narrowPeak",
E11bed = "results_10M/macs2/H3K4me3_E11.5_overlap.narrowPeak",
E12bed = "results_10M/macs2/H3K4me3_E12.5_overlap.narrowPeak",
E13bed = "results_10M/macs2/H3K4me3_E13.5_overlap.narrowPeak",
E14bed = "results_10M/macs2/H3K4me3_E14.5_overlap.narrowPeak",
E15bed = "results_10M/macs2/H3K4me3_E15.5_overlap.narrowPeak"
output:
E10bed = "results_10M/macs2/H3K4me3_E10.5_overlap.frip",
E11bed = "results_10M/macs2/H3K4me3_E11.5_overlap.frip",
E12bed = "results_10M/macs2/H3K4me3_E12.5_overlap.frip",
E13bed = "results_10M/macs2/H3K4me3_E13.5_overlap.frip",
E14bed = "results_10M/macs2/H3K4me3_E14.5_overlap.frip",
E15bed = "results_10M/macs2/H3K4me3_E15.5_overlap.frip"
run:
shell("python2.7 scripts/encode_frip.py {input.E10bam} {input.E10bed} > {output.E10bed}")
shell("python2.7 scripts/encode_frip.py {input.E11bam} {input.E11bed} > {output.E11bed}")
shell("python2.7 scripts/encode_frip.py {input.E12bam} {input.E12bed} > {output.E12bed}")
shell("python2.7 scripts/encode_frip.py {input.E13bam} {input.E13bed} > {output.E13bed}")
shell("python2.7 scripts/encode_frip.py {input.E14bam} {input.E14bed} > {output.E14bed}")
shell("python2.7 scripts/encode_frip.py {input.E15bam} {input.E15bed} > {output.E15bed}")
# =============================================================================================== # ===============================================================================================
# 10. Combine all QC into multiqc output # 10. Combine all QC into multiqc output
# =============================================================================================== # ===============================================================================================
...@@ -610,54 +649,54 @@ rule overlap_peaks: ...@@ -610,54 +649,54 @@ rule overlap_peaks:
# rule multiqc: # rule multiqc:
# input: # input:
# fastqc # fastqc
# expand("results/fastqc/{sample}_R1_fastqc.html", sample=all_samples), # expand("results_10M/fastqc/{sample}_R1_fastqc.html", sample=all_samples),
# expand("results/fastqc/{sample}_R2_fastqc.html", sample=all_samples), # expand("results_10M/fastqc/{sample}_R2_fastqc.html", sample=all_samples),
# expand("results/fastqc/{sample}_R1_fastqc.zip", sample=all_samples), # expand("results_10M/fastqc/{sample}_R1_fastqc.zip", sample=all_samples),
# expand("results/fastqc/{sample}_R2_fastqc.zip", sample=all_samples), # expand("results_10M/fastqc/{sample}_R2_fastqc.zip", sample=all_samples),
# # bwa # # bwa
# expand("logs/{sample}.align", sample=all_samples), # expand("results_10M/logs/{sample}.align", sample=all_samples),
# expand("logs/{sample}.flagstat.qc", sample=all_samples), # expand("results_10M/logs/{sample}.flagstat.qc", sample=all_samples),
# # preseq # # preseq
# expand("results/preseq/{sample}.ccurve.txt", sample=all_samples), # expand("results_10M/preseq/{sample}.ccurve.txt", sample=all_samples),
# # picard # # picard
# expand("results/picard/{sample}_est_lib_complex_metrics.txt", sample=all_samples), # expand("results_10M/picard/{sample}_est_lib_complex_metrics.txt", sample=all_samples),
# # deepTools # # deepTools
# "results/deeptools/multibamsum.npz", # "results_10M/deeptools/multibamsum.npz",
# "results/deeptools/multibamsum.tab", # "results_10M/deeptools/multibamsum.tab",
# "results/deeptools/pearsoncor_multibamsum.png", # "results_10M/deeptools/pearsoncor_multibamsum.png",
# "results/deeptools/pearsoncor_multibamsum_matrix.txt", # "results_10M/deeptools/pearsoncor_multibamsum_matrix.txt",
# expand("results/deeptools/{sample}.SeqDepthNorm.bw", sample=all_samples), # expand("results_10M/deeptools/{sample}.SeqDepthNorm.bw", sample=all_samples),
# "results/deeptools/multiBAM_fingerprint.png", # "results_10M/deeptools/multiBAM_fingerprint.png",
# "results/deeptools/multiBAM_fingerprint_metrics.txt", # "results_10M/deeptools/multiBAM_fingerprint_metrics.txt",
# "results/deeptools/multiBAM_fingerprint_rawcounts.txt", # "results_10M/deeptools/multiBAM_fingerprint_rawcounts.txt",
# "results/deeptools/plot_coverage.png", # "results_10M/deeptools/plot_coverage.png",
# "results/deeptools/plot_coverage_rawcounts.tab", # "results_10M/deeptools/plot_coverage_rawcounts.tab",
# "results/deeptools/bamPEFragmentSize_hist.png", # "results_10M/deeptools/bamPEFragmentSize_hist.png",
# "results/deeptools/bamPEFragmentSize_rawcounts.tab", # "results_10M/deeptools/bamPEFragmentSize_rawcounts.tab",
# # phantomPeaks # # phantomPeaks
# expand("results/phantomPeaks/{sample}.spp.pdf", sample = IPS), # expand("results_10M/phantomPeaks/{sample}.spp.pdf", sample = IPS),
# expand("results/phantomPeaks/{sample}.spp.Rdata", sample = IPS), # expand("results_10M/phantomPeaks/{sample}.spp.Rdata", sample = IPS),
# expand("results/phantomPeaks/{sample}.spp.out", sample = IPS), # expand("results_10M/phantomPeaks/{sample}.spp.out", sample = IPS),
# # macs2 # # macs2
# expand("results/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS), # expand("results_10M/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS),
# expand("results/macs2/{case}_vs_{control}_macs2_peaks.xls", zip, case=IPS, control=INPUTS), # expand("results_10M/macs2/{case}_vs_{control}_macs2_peaks.xls", zip, case=IPS, control=INPUTS),
# expand("results/macs2/{case}_vs_{control}_macs2_summits.bed", zip, case=IPS, control=INPUTS), # expand("results_10M/macs2/{case}_vs_{control}_macs2_summits.bed", zip, case=IPS, control=INPUTS),
# expand("results/macs2/{case}-vs-{control}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS), # expand("results_10M/macs2/{case}-vs-{control}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS),
# expand("results/frip/{case}.frip.txt", case=IPS) # expand("results_10M/frip/{case}.frip.txt", case=IPS)
# params: # params:
# "results/fastqc/", # "results_10M/fastqc/",
# "results/bwa/", # "results_10M/bwa/",
# "logs/", # "results_10M/logs/",
# "results/deeptools/", # "results_10M/deeptools/",
# "results/macs2/", # "results_10M/macs2/",
# "results/phantomPeaks/", # "results_10M/phantomPeaks/",
# "results/frip/", # "results_10M/frip/",
# "results/picard/" # "results_10M/picard/"
# output: # output:
# directory("results/multiqc/multiqc_report_data/"), # directory("results_10M/multiqc/multiqc_report_data/"),
# "results/multiqc/multiqc_report.html", # "results_10M/multiqc/multiqc_report.html",
# log: # log:
# "logs/multiqc.log" # "results_10M/logs/multiqc.log"
# shell: # shell:
# "multiqc -f {params} -m fastqc -m samtools -m picard -m preseq -m deeptools -m phantompeakqualtools -m macs2 -o results/multiqc/ \ # "multiqc -f {params} -m fastqc -m samtools -m picard -m preseq -m deeptools -m phantompeakqualtools -m macs2 -o results_10M/multiqc/ \
# -n multiqc_report.html 2> {log}" # -n multiqc_report.html 2> {log}"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment