Skip to content
Snippets Groups Projects
Commit 8e388bed authored by Laura Cook's avatar Laura Cook
Browse files

removed --extend_reads in bamCoverage because mouse data in single-ended...

removed --extend_reads in bamCoverage because mouse data in single-ended unlike dunnart reads. Added indexed bam as an input for deepTools modules because otherwise it will try and run without first doing the index rule
parent c35e2d92
No related branches found
No related tags found
No related merge requests found
...@@ -30,8 +30,8 @@ with open(config['SAMPLES'], "r") as f: ...@@ -30,8 +30,8 @@ with open(config['SAMPLES'], "r") as f:
# skip the header # skip the header
header = next(reader) header = next(reader)
for row in reader: for row in reader:
STAGE.append(row[1]) STAGE.append(row[2])
MARK.append(row[2]) MARK.append(row[1])
IPS.append(row[3]) IPS.append(row[3])
INPUTS.append(row[4]) INPUTS.append(row[4])
...@@ -49,50 +49,48 @@ all_samples = IPS + unique_inputs ...@@ -49,50 +49,48 @@ all_samples = IPS + unique_inputs
rule all: rule all:
input: input:
expand("results/bwa/{sample}_{stage}_{mark}.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_PPq30.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), expand("results/bwa/{sample}_{stage}_{mark}_q30.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_PPq30.sorted.dupmark.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_PPq30.sorted.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_PPq30.sorted.dedup.bai", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("logs/{sample}_{stage}_{mark}.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), expand("logs/{sample}_{stage}_{mark}.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/preseq/{sample}_{stage}_{mark}.ccurve.txt", zip, sample=all_samples, stage=STAGE, mark=MARK), # #expand("results/preseq/{sample}_{stage}_{mark}.ccurve.txt", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt", zip, sample=all_samples, stage=STAGE, mark=MARK), # #expand("results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("logs/{sample}_{stage}_{mark}.picardLibComplexity", zip, sample=all_samples, stage=STAGE, mark=MARK), # #expand("logs/{sample}_{stage}_{mark}.picardLibComplexity", zip, sample=all_samples, stage=STAGE, mark=MARK),
"results/deeptools/multibamsum.npz", "results/deeptools/multibamsum.npz",
"results/deeptools/multibamsum.tab", "results/deeptools/multibamsum.tab",
"results/deeptools/pearsoncor_multibamsum.png", "results/deeptools/pearsoncor_multibamsum.png",
"results/deeptools/pearsoncor_multibamsum_matrix.txt", "results/deeptools/pearsoncor_multibamsum_matrix.txt",
expand("results/deeptools/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK), expand("results/deeptools/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK),
"results/deeptools/multiBAM_fingerprint.png", "results/deeptools/multiBAM_fingerprint.png",
"results/deeptools/multiBAM_fingerprint_metrics.txt", "results/deeptools/multiBAM_fingerprint_metrics.txt",
"results/deeptools/multiBAM_fingerprint_rawcounts.txt", "results/deeptools/multiBAM_fingerprint_rawcounts.txt",
"results/deeptools/plot_coverage.png", "results/deeptools/plot_coverage.png",
"results/deeptools/plot_coverage_rawcounts.tab", "results/deeptools/plot_coverage_rawcounts.tab",
"results/deeptools/bamPEFragmentSize_hist.png", expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK),
"results/deeptools/bamPEFragmentSize_rawcounts.tab", expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.Rdata", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK), expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.out", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.Rdata", zip, sample=all_samples, stage=STAGE, mark=MARK), expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.out", zip, sample=all_samples, stage=STAGE, mark=MARK), expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), expand("logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), #expand("results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), # expand("results/bwa/{case}_{stage}_{mark}.bed", zip, case=IPS, stage=STAGE, mark=MARK),
expand("results/bwa/{case}_{stage}_{mark}.bedpe", zip, case=IPS, stage=STAGE, mark=MARK), # expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK),
expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK), # expand("results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt", case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt", case=IPS, control=INPUTS, stage=STAGE, mark=MARK), # "results/macs2/E10.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
"results/macs2/E10.5_H3K27ac_macs2_pooled_peaks.narrowPeak", # "results/macs2/E11.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
"results/macs2/E11.5_H3K27ac_macs2_pooled_peaks.narrowPeak", # "results/macs2/E12.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
"results/macs2/E12.5_H3K27ac_macs2_pooled_peaks.narrowPeak", # "results/macs2/E13.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
"results/macs2/E13.5_H3K27ac_macs2_pooled_peaks.narrowPeak", # "results/macs2/E14.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
"results/macs2/E14.5_H3K27ac_macs2_pooled_peaks.narrowPeak", # "results/macs2/E15.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
"results/macs2/E15.5_H3K27ac_macs2_pooled_peaks.narrowPeak", # "results/macs2/E10.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
"results/macs2/E10.5_H3K4me3_macs2_pooled_peaks.narrowPeak", # "results/macs2/E11.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
"results/macs2/E11.5_H3K4me3_macs2_pooled_peaks.narrowPeak", # "results/macs2/E12.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
"results/macs2/E12.5_H3K4me3_macs2_pooled_peaks.narrowPeak", # "results/macs2/E13.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
"results/macs2/E13.5_H3K4me3_macs2_pooled_peaks.narrowPeak", # "results/macs2/E14.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
"results/macs2/E14.5_H3K4me3_macs2_pooled_peaks.narrowPeak", # "results/macs2/E15.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
"results/macs2/E15.5_H3K4me3_macs2_pooled_peaks.narrowPeak", # expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK)
expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK)
# directory("results/multiqc/multiqc_report_data/"), # directory("results/multiqc/multiqc_report_data/"),
# "results/multiqc/multiqc_report.html" # "results/multiqc/multiqc_report.html"
...@@ -120,10 +118,15 @@ rule all: ...@@ -120,10 +118,15 @@ rule all:
# =============================================================================================== # ===============================================================================================
# 4. FILTERING # 4. FILTERING
# > remove unmapped, mate unmapped # > remove unmapped, mate unmapped
# > remove low MAPQ reads (-q) # > remove low MAPQ reads (-q 30)
# > keep proper pairs (-f) # > -F 1804
# -Read unmapped
# -Mate unmapped
# -Not primary alignment
# -Read fails platform/vendor quality checks
# -Read is PCR or optical duplicate
# > mark duplicates & remove duplicates (picard) # > mark duplicates & remove duplicates (picard)
# > index final position sorted BAM # > re-filter, sort and index final BAM
# =============================================================================================== # ===============================================================================================
rule filter: rule filter:
...@@ -132,22 +135,32 @@ rule filter: ...@@ -132,22 +135,32 @@ rule filter:
output: output:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam" "results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam"
log: log:
"logs/{sample}_{stage}_{mark}.dedup" "logs/{sample}_{stage}_{mark}.filter"
shell: shell:
"samtools sort {input} | samtools view -b -q 30 -o {output} - 2> {log}" "samtools view -b -F 1804 -q 30 {input} | samtools sort -o {output} - 2> {log}"
rule markDups: rule markDups:
input: input:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam" "results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam"
output: output:
bam="results/bwa/{sample}_{stage}_{mark}_q30.sorted.dupmark.bam", bam="results/bwa/{sample}_{stage}_{mark}_q30.dedup.bam",
dupQC="results/bwa/{sample}_{stage}_{mark}.dup.qc" dupQC="results/bwa/{sample}_{stage}_{mark}.dedup.qc"
log: log:
"logs/{sample}_{stage}_{mark}.dupmark" "logs/{sample}_{stage}_{mark}.dedup"
shell: shell:
"picard MarkDuplicates I={input} O={output.bam} \ "picard MarkDuplicates I={input} O={output.bam} \
METRICS_FILE={output.dupQC} REMOVE_DUPLICATES=true ASSUME_SORTED=true 2> {log}" METRICS_FILE={output.dupQC} REMOVE_DUPLICATES=true ASSUME_SORTED=true 2> {log}"
rule filter2:
input:
"results/bwa/{sample}_{stage}_{mark}_q30.dedup.bam"
output:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
log:
"logs/{sample}_{stage}_{mark}.dedup"
shell:
"samtools view -b -F 1804 {input} | samtools sort -o {output} - 2> {log}"
rule indexBam: rule indexBam:
input: input:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam" "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
...@@ -158,13 +171,13 @@ rule indexBam: ...@@ -158,13 +171,13 @@ rule indexBam:
shell: shell:
"samtools index {input} {output} 2> {log}" "samtools index {input} {output} 2> {log}"
# =============================================================================================== # # ===============================================================================================
# 4. ALIGNMENT QC # # 4. ALIGNMENT QC
# > SAMtools flagstat statistics # # > SAMtools flagstat statistics
# > CollectMultipleMetrics (picard) # # > CollectMultipleMetrics (picard)
# > Compute Library Complexity (PreSeq) # # > Compute Library Complexity (PreSeq)
# =============================================================================================== # # ===============================================================================================
#
rule mappingStats: rule mappingStats:
input: input:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam" "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
...@@ -172,35 +185,35 @@ rule mappingStats: ...@@ -172,35 +185,35 @@ rule mappingStats:
"logs/{sample}_{stage}_{mark}.flagstat.qc" "logs/{sample}_{stage}_{mark}.flagstat.qc"
shell: shell:
"samtools flagstat {input} > {output}" "samtools flagstat {input} > {output}"
#
rule preseq: # # rule preseq:
input: # # input:
"results/bwa/{sample}_{stage}_{mark}.bam" # # "results/bwa/{sample}_{stage}_{mark}.bam"
output: # # output:
"results/preseq/{sample}_{stage}_{mark}.ccurve.txt" # # "results/preseq/{sample}_{stage}_{mark}.ccurve.txt"
log: # # log:
"logs/{sample}_{stage}_{mark}.preseq" # # "logs/{sample}_{stage}_{mark}.preseq"
shell: # # shell:
"preseq lc_extrap -v -output {output} -bam {input} 2> {log}" # # "preseq lc_extrap -v -output {output} -bam {input} 2> {log}"
# #
# #
rule get_picard_complexity_metrics: # # rule get_picard_complexity_metrics:
input: # # input:
"results/bwa/{sample}_{stage}_{mark}.bam" # # "results/bwa/{sample}_{stage}_{mark}.bam"
output: # # output:
"results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt" # # "results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt"
log: # # log:
"logs/{sample}_{stage}_{mark}.picardLibComplexity" # # "logs/{sample}_{stage}_{mark}.picardLibComplexity"
shell: # # shell:
"picard -Xmx6G EstimateLibraryComplexity INPUT={input} OUTPUT={output} USE_JDK_DEFLATER=TRUE USE_JDK_INFLATER=TRUE VERBOSITY=ERROR" # # "picard -Xmx6G EstimateLibraryComplexity INPUT={input} OUTPUT={output} USE_JDK_DEFLATER=TRUE USE_JDK_INFLATER=TRUE VERBOSITY=ERROR"
#
# =============================================================================================== # # ===============================================================================================
# 5. deepTools # # 5. deepTools
# > multiBAMsummary # # > multiBAMsummary
# > plotCorrelation # # > plotCorrelation
# > plotFingerprint # # > plotFingerprint
# > bamCoverage (read depth normalised bigWig files) # # > bamCoverage (read depth normalised bigWig files)
# =============================================================================================== # # ===============================================================================================
rule deeptools_summary: rule deeptools_summary:
input: input:
...@@ -239,23 +252,19 @@ rule deeptools_correlation: ...@@ -239,23 +252,19 @@ rule deeptools_correlation:
rule deeptools_coverage: rule deeptools_coverage:
input: input:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam" bam = "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam",
bai = "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"
output: output:
"results/deeptools/{sample}_{stage}_{mark}.SeqDepthNorm.bw" "results/deeptools/{sample}_{stage}_{mark}.SeqDepthNorm.bw"
log: log:
"logs/{sample}_{stage}_{mark}_coverage.deepTools" "logs/{sample}_{stage}_{mark}_coverage.deepTools"
shell: shell:
"bamCoverage \ "bamCoverage --bam {input.bam} --binSize 10 --normalizeUsing RPGC --effectiveGenomeSize 2308125349 --numberOfProcessors 4 -o {output} 2> {log}"
--bam {input} \
--binSize 10 \
--normalizeUsing RPGC \
--effectiveGenomeSize add mm10 \
--extendReads \
-o {output} 2> {log}"
rule deeptools_fingerprint: rule deeptools_fingerprint:
input: input:
expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK) bam = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK),
bai = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark = MARK)
output: output:
fig="results/deeptools/multiBAM_fingerprint.png", fig="results/deeptools/multiBAM_fingerprint.png",
metrics="results/deeptools/multiBAM_fingerprint_metrics.txt", metrics="results/deeptools/multiBAM_fingerprint_metrics.txt",
...@@ -265,7 +274,7 @@ rule deeptools_fingerprint: ...@@ -265,7 +274,7 @@ rule deeptools_fingerprint:
"logs/fingerprint.deepTools" "logs/fingerprint.deepTools"
shell: shell:
"plotFingerprint -p {threads} \ "plotFingerprint -p {threads} \
-b {input} \ -b {input.bam} \
--plotFile {output.fig} \ --plotFile {output.fig} \
--outQualityMetrics {output.metrics} \ --outQualityMetrics {output.metrics} \
--outRawCounts {output.rawcounts} \ --outRawCounts {output.rawcounts} \
...@@ -275,22 +284,23 @@ rule deeptools_fingerprint: ...@@ -275,22 +284,23 @@ rule deeptools_fingerprint:
rule deeptools_plotCoverage: rule deeptools_plotCoverage:
input: input:
expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK) bam = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark=MARK),
bai = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark=MARK)
output: output:
fig="results/deeptools/plot_coverage.png", fig="results/deeptools/plot_coverage.png",
rawcounts="results/deeptools/plot_coverage_rawcounts.tab" rawcounts="results/deeptools/plot_coverage_rawcounts.tab"
log: log:
"logs/plotCoverage.deepTools" "logs/plotCoverage.deepTools"
shell: shell:
"plotCoverage --plotFile {output.fig} \ "plotCoverage --bamfiles {input.bam} --plotFile {output.fig} \
-n 1000000 \ -n 1000000 \
--outRawCounts {output.rawcounts} \ --outRawCounts {output.rawcounts} \
--ignoreDuplicates 2> {log}" --ignoreDuplicates 2> {log}"
# =============================================================================================== # # ===============================================================================================
# 6. phantomPeakQuals # # 6. phantomPeakQuals
# =============================================================================================== # # ===============================================================================================
rule phantomPeakQuals: rule phantomPeakQuals:
input: input:
...@@ -304,20 +314,20 @@ rule phantomPeakQuals: ...@@ -304,20 +314,20 @@ rule phantomPeakQuals:
shell: shell:
"Rscript scripts/run_spp.R -c={input} -savp={output.savp} -savd={output.savd} -out={output.out} 2> {log}" "Rscript scripts/run_spp.R -c={input} -savp={output.savp} -savd={output.savd} -out={output.out} 2> {log}"
# =============================================================================================== # # ===============================================================================================
# 7. Call peaks (MACS2) # # 7. Call peaks (MACS2)
# =============================================================================================== # # ===============================================================================================
rule call_peaks_macs2: rule call_peaks_macs2:
input: input:
control = "results/bwa/{control}_{stage}_{mark}_PPq30.sorted.dedup.bam", control = "results/bwa/{control}_{stage}_{mark}_q30.sorted.dedup.bam",
case = "results/bwa/{control}_{stage}_{mark}_PPq30.sorted.dedup.bam" case = "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam"
output: output:
"results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls",
"results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed",
"results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak",
log: log:
"logs/{case}_vs_{control}_call_peaks_macs2.log" "logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log"
params: params:
name = "{case}_vs_{control}_{stage}_{mark}_macs2", name = "{case}_vs_{control}_{stage}_{mark}_macs2",
shell: shell:
...@@ -328,116 +338,99 @@ rule call_peaks_macs2: ...@@ -328,116 +338,99 @@ rule call_peaks_macs2:
-g mm 2> {log} " -g mm 2> {log} "
# =============================================================================================== # # ===============================================================================================
# 8. Peak QC # # 8. Peak QC
# > narrow peak counts # # > narrow peak counts
# > fraction of reads in peaks (convert BAM to bed first then do intersect with peaks) # # > fraction of reads in peaks (convert BAM to bed first then do intersect with peaks)
# =============================================================================================== # # ===============================================================================================
# peak counts in a format that multiqc can handle
rule get_narrow_peak_counts_for_multiqc:
input:
peaks = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak"
output:
"results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json"
params:
peakType = "narrowPeak"
shell:
"python3 scripts/count_peaks.py \
--peak_type {params.peakType} \
--peaks {input.peaks} --sample_name {wildcards.case} > {output}"
## Convert BAM to tagAlign file for calculating FRiP QC metric (Fraction of reads in peaks)
rule bamToBed:
input:
"results/bwa/{case}_{stage}_{mark}_PPq30.sorted.dedup.bam"
output:
"results/bwa/{case}_{stage}_{mark}.bed"
log:
"logs/{case}_{stage}_{mark}_.bamToBed"
shell:
"samtools sort -n {input} | bedtools bamtobed -i - > {output}"
## Fraction of reads in peaks #peak counts in a format that multiqc can handle
rule frip: # rule get_narrow_peak_counts_for_multiqc:
input: # input:
bed = "results/bwa/{case}_{stage}_{mark}.bed", # peaks = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak"
peak = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak" # output:
output: # "results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json"
"results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt" # params:
shell: # peakType = "narrowPeak"
"python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}" # shell:
# "python3 scripts/count_peaks.py \
# --peak_type {params.peakType} \
# =============================================================================================== # --peaks {input.peaks} --sample_name {wildcards.case} > {output}"
# 9. Create consensus peaksets for replicates #
# =============================================================================================== #
# ## Convert BAM to tagAlign file for calculating FRiP QC metric (Fraction of reads in peaks)
# Based on ENCODE `overlap_peaks.py` - recommended for histone marks. # rule bamToBed:
# Need to pool peaks first for each replicate # input:
# "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam"
# output:
# "results/bwa/{case}_{stage}_{mark}.bed"
# log:
# "logs/{case}_{stage}_{mark}.bamToBed"
# shell:
# "samtools sort -n {input} | bedtools bamtobed -i - > {output}"
#
#
# ## Fraction of reads in peaks
# rule frip:
# input:
# bed = "results/bwa/{case}_{stage}_{mark}.bed",
# peak = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak"
# output:
# "results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt"
# shell:
# "python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}"
rule pool_peaks:
input:
M1S1 = expand(["results/macs2/{case}_vs_{control}_E10.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M1S2 = expand(["results/macs2/{case}_vs_{control}_E11.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M1S3 = expand(["results/macs2/{case}_vs_{control}_E12.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M1S4 = expand(["results/macs2/{case}_vs_{control}_E13.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M1S5 = expand(["results/macs2/{case}_vs_{control}_E14.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M1S6 = expand(["results/macs2/{case}_vs_{control}_E15.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M2S1 = expand(["results/macs2/{case}_vs_{control}_E10.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M2S2 = expand(["results/macs2/{case}_vs_{control}_E11.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M2S3 = expand(["results/macs2/{case}_vs_{control}_E12.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M2S4 = expand(["results/macs2/{case}_vs_{control}_E13.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M2S5 = expand(["results/macs2/{case}_vs_{control}_E14.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M2S6 = expand(["results/macs2/{case}_vs_{control}_E15.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS)
output:
M1S1 = "results/macs2/E10.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
M1S2 = "results/macs2/E11.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
M1S3 = "results/macs2/E12.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
M1S4 = "results/macs2/E13.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
M1S5 = "results/macs2/E14.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
M1S6 = "results/macs2/E15.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
M2S1 = "results/macs2/E10.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
M2S2 = "results/macs2/E11.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
M2S3 = "results/macs2/E12.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
M2S4 = "results/macs2/E13.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
M2S5 = "results/macs2/E14.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
M2S6 = "results/macs2/E15.5_H3K4me3_macs2_pooled_peaks.narrowPeak"
run:
shell("cat {input.M1S1} > {output.M1S1}")
shell("cat {input.M1S2} > {output.M1S2}")
shell("cat {input.M1S3} > {output.M1S3}")
shell("cat {input.M1S4} > {output.M1S4}")
shell("cat {input.M1S5} > {output.M1S5}")
shell("cat {input.M1S6} > {output.M1S6}")
shell("cat {input.M2S1} > {output.M2S1}")
shell("cat {input.M2S2} > {output.M2S2}")
shell("cat {input.M2S3} > {output.M2S3}")
shell("cat {input.M2S4} > {output.M2S4}")
shell("cat {input.M2S5} > {output.M2S5}")
shell("cat {input.M2S6} > {output.M2S6}")
rule overlap_peaks_H3K4me3:
input:
peaks=expand(["results/macs2/{case}_vs_{control}_E10.5_H3K4me3_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E11.5_H3K4me3_macs2_peaks.narrowPeak"],["results/macs2/{case}_vs_{control}_E12.5_H3K4me3_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E13.5_H3K4me3_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E14.5_H3K4me3_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E15.5_H3K4me3_macs2_peaks.narrowPeak"],zip, case=IPS, control=INPUTS),
pooled=expand("results/macs2/{stage}_{mark}_macs2_pooled_peaks.narrowPeak", zip, stage = STAGE, mark = MARK)
output:
expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK)
shell:
"python2.7 scripts/overlap_peaks.py {input.peaks} {input.pooled} {output}"
# # ===============================================================================================
# # 9. Create consensus peaksets for replicates
# # ===============================================================================================
#
# # Based on ENCODE `overlap_peaks.py` - recommended for histone marks.
# # Need to pool peaks first for each replicate
#
# rule pool_peaks:
# input:
# E10 = expand(["results/macs2/{case}_vs_{control}_E10.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
# E11 = expand(["results/macs2/{case}_vs_{control}_E11.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
# E12 = expand(["results/macs2/{case}_vs_{control}_E12.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
# E13 = expand(["results/macs2/{case}_vs_{control}_E13.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
# E14 = expand(["results/macs2/{case}_vs_{control}_E14.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
# E15 = expand(["results/macs2/{case}_vs_{control}_E15.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK)
# output:
# E10 = "results/macs2/E10.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
# E11 = "results/macs2/E11.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
# E12 = "results/macs2/E12.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
# E13 = "results/macs2/E13.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
# E14 = "results/macs2/E14.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
# E15 = "results/macs2/E15.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
# run:
# shell("cat {input.E10} > {output.E10}")
# shell("cat {input.E11} > {output.E11}")
# shell("cat {input.E12} > {output.E12}")
# shell("cat {input.E13} > {output.E13}")
# shell("cat {input.E14} > {output.E14}")
# shell("cat {input.E15} > {output.E15}")
#
# rule overlap_peaks:
# input:
# E10= expand(["results/macs2/{case}_vs_{control}_E10.5_{mark}_macs2_peaks.narrowPeak"]),
# E11= expand(["results/macs2/{case}_vs_{control}_E11.5_{mark}_macs2_peaks.narrowPeak"]),
# E12= expand(["results/macs2/{case}_vs_{control}_E12.5_{mark}_macs2_peaks.narrowPeak"]),
# E13= expand(["results/macs2/{case}_vs_{control}_E13.5_{mark}_macs2_peaks.narrowPeak"]),
# E14= expand(["results/macs2/{case}_vs_{control}_E14.5_{mark}_macs2_peaks.narrowPeak"]),
# E15= expand(["results/macs2/{case}_vs_{control}_E15.5_{mark}_macs2_peaks.narrowPeak"]).
# E10_pooled=expand("results/macs2/E10.5_{mark}_macs2_pooled_peaks.narrowPeak")
# E11_pooled=expand("results/macs2/E11.5_{mark}_macs2_pooled_peaks.narrowPeak")
# E12_pooled=
# E13_pooled=
# E14_pooled=
# E15_pooled=
# output:
# expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK)
# shell:
# "python2.7 scripts/overlap_peaks.py {input.peaks} {input.pooled} {output}"
#
rule overlap_peaks_H3K27ac:
input:
peaks=expand(["results/macs2/{case}_vs_{control}_E10.5_H3K27ac_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E11.5_H3K27ac_macs2_peaks.narrowPeak"],["results/macs2/{case}_vs_{control}_E12.5_H3K27ac_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E13.5_H3K27ac_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E14.5_H3K27ac_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E15.5_H3K27ac_macs2_peaks.narrowPeak"],zip, case=IPS, control=INPUTS),
pooled=expand("results/macs2/{stage}_{mark}_macs2_pooled_peaks.narrowPeak", zip, stage = STAGE, mark = MARK)
output:
expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK)
shell:
"python2.7 scripts/overlap_peaks.py {input.peaks} {input.pooled} {output}"
# =============================================================================================== # ===============================================================================================
# 10. QC on replicate peaks # 10. QC on replicate peaks
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment