Skip to content
Snippets Groups Projects
Commit 8e388bed authored by Laura Cook's avatar Laura Cook
Browse files

removed --extend_reads in bamCoverage because mouse data in single-ended...

removed --extend_reads in bamCoverage because mouse data in single-ended unlike dunnart reads. Added indexed bam as an input for deepTools modules because otherwise it will try and run without first doing the index rule
parent c35e2d92
Branches
No related tags found
No related merge requests found
......@@ -30,8 +30,8 @@ with open(config['SAMPLES'], "r") as f:
# skip the header
header = next(reader)
for row in reader:
STAGE.append(row[1])
MARK.append(row[2])
STAGE.append(row[2])
MARK.append(row[1])
IPS.append(row[3])
INPUTS.append(row[4])
......@@ -49,50 +49,48 @@ all_samples = IPS + unique_inputs
rule all:
input:
expand("results/bwa/{sample}_{stage}_{mark}.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_PPq30.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_PPq30.sorted.dupmark.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_PPq30.sorted.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_PPq30.sorted.dedup.bai", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_q30.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("logs/{sample}_{stage}_{mark}.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/preseq/{sample}_{stage}_{mark}.ccurve.txt", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("logs/{sample}_{stage}_{mark}.picardLibComplexity", zip, sample=all_samples, stage=STAGE, mark=MARK),
"results/deeptools/multibamsum.npz",
"results/deeptools/multibamsum.tab",
"results/deeptools/pearsoncor_multibamsum.png",
"results/deeptools/pearsoncor_multibamsum_matrix.txt",
expand("results/deeptools/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK),
"results/deeptools/multiBAM_fingerprint.png",
"results/deeptools/multiBAM_fingerprint_metrics.txt",
"results/deeptools/multiBAM_fingerprint_rawcounts.txt",
"results/deeptools/plot_coverage.png",
"results/deeptools/plot_coverage_rawcounts.tab",
"results/deeptools/bamPEFragmentSize_hist.png",
"results/deeptools/bamPEFragmentSize_rawcounts.tab",
expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.Rdata", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.out", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/bwa/{case}_{stage}_{mark}.bedpe", zip, case=IPS, stage=STAGE, mark=MARK),
expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK),
expand("results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt", case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
"results/macs2/E10.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
"results/macs2/E11.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
"results/macs2/E12.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
"results/macs2/E13.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
"results/macs2/E14.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
"results/macs2/E15.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
"results/macs2/E10.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
"results/macs2/E11.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
"results/macs2/E12.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
"results/macs2/E13.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
"results/macs2/E14.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
"results/macs2/E15.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK)
# #expand("results/preseq/{sample}_{stage}_{mark}.ccurve.txt", zip, sample=all_samples, stage=STAGE, mark=MARK),
# #expand("results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt", zip, sample=all_samples, stage=STAGE, mark=MARK),
# #expand("logs/{sample}_{stage}_{mark}.picardLibComplexity", zip, sample=all_samples, stage=STAGE, mark=MARK),
"results/deeptools/multibamsum.npz",
"results/deeptools/multibamsum.tab",
"results/deeptools/pearsoncor_multibamsum.png",
"results/deeptools/pearsoncor_multibamsum_matrix.txt",
expand("results/deeptools/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK),
"results/deeptools/multiBAM_fingerprint.png",
"results/deeptools/multiBAM_fingerprint_metrics.txt",
"results/deeptools/multiBAM_fingerprint_rawcounts.txt",
"results/deeptools/plot_coverage.png",
"results/deeptools/plot_coverage_rawcounts.tab",
expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.Rdata", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/phantomPeaks/{sample}_{stage}_{mark}.spp.out", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
#expand("results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
# expand("results/bwa/{case}_{stage}_{mark}.bed", zip, case=IPS, stage=STAGE, mark=MARK),
# expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK),
# expand("results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt", case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
# "results/macs2/E10.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
# "results/macs2/E11.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
# "results/macs2/E12.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
# "results/macs2/E13.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
# "results/macs2/E14.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
# "results/macs2/E15.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
# "results/macs2/E10.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
# "results/macs2/E11.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
# "results/macs2/E12.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
# "results/macs2/E13.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
# "results/macs2/E14.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
# "results/macs2/E15.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
# expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK)
# directory("results/multiqc/multiqc_report_data/"),
# "results/multiqc/multiqc_report.html"
......@@ -120,10 +118,15 @@ rule all:
# ===============================================================================================
# 4. FILTERING
# > remove unmapped, mate unmapped
# > remove low MAPQ reads (-q)
# > keep proper pairs (-f)
# > remove low MAPQ reads (-q 30)
# > -F 1804
# -Read unmapped
# -Mate unmapped
# -Not primary alignment
# -Read fails platform/vendor quality checks
# -Read is PCR or optical duplicate
# > mark duplicates & remove duplicates (picard)
# > index final position sorted BAM
# > re-filter, sort and index final BAM
# ===============================================================================================
rule filter:
......@@ -132,22 +135,32 @@ rule filter:
output:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam"
log:
"logs/{sample}_{stage}_{mark}.dedup"
"logs/{sample}_{stage}_{mark}.filter"
shell:
"samtools sort {input} | samtools view -b -q 30 -o {output} - 2> {log}"
"samtools view -b -F 1804 -q 30 {input} | samtools sort -o {output} - 2> {log}"
rule markDups:
input:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam"
output:
bam="results/bwa/{sample}_{stage}_{mark}_q30.sorted.dupmark.bam",
dupQC="results/bwa/{sample}_{stage}_{mark}.dup.qc"
bam="results/bwa/{sample}_{stage}_{mark}_q30.dedup.bam",
dupQC="results/bwa/{sample}_{stage}_{mark}.dedup.qc"
log:
"logs/{sample}_{stage}_{mark}.dupmark"
"logs/{sample}_{stage}_{mark}.dedup"
shell:
"picard MarkDuplicates I={input} O={output.bam} \
METRICS_FILE={output.dupQC} REMOVE_DUPLICATES=true ASSUME_SORTED=true 2> {log}"
rule filter2:
input:
"results/bwa/{sample}_{stage}_{mark}_q30.dedup.bam"
output:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
log:
"logs/{sample}_{stage}_{mark}.dedup"
shell:
"samtools view -b -F 1804 {input} | samtools sort -o {output} - 2> {log}"
rule indexBam:
input:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
......@@ -158,13 +171,13 @@ rule indexBam:
shell:
"samtools index {input} {output} 2> {log}"
# ===============================================================================================
# 4. ALIGNMENT QC
# > SAMtools flagstat statistics
# > CollectMultipleMetrics (picard)
# > Compute Library Complexity (PreSeq)
# ===============================================================================================
# # ===============================================================================================
# # 4. ALIGNMENT QC
# # > SAMtools flagstat statistics
# # > CollectMultipleMetrics (picard)
# # > Compute Library Complexity (PreSeq)
# # ===============================================================================================
#
rule mappingStats:
input:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
......@@ -172,35 +185,35 @@ rule mappingStats:
"logs/{sample}_{stage}_{mark}.flagstat.qc"
shell:
"samtools flagstat {input} > {output}"
rule preseq:
input:
"results/bwa/{sample}_{stage}_{mark}.bam"
output:
"results/preseq/{sample}_{stage}_{mark}.ccurve.txt"
log:
"logs/{sample}_{stage}_{mark}.preseq"
shell:
"preseq lc_extrap -v -output {output} -bam {input} 2> {log}"
rule get_picard_complexity_metrics:
input:
"results/bwa/{sample}_{stage}_{mark}.bam"
output:
"results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt"
log:
"logs/{sample}_{stage}_{mark}.picardLibComplexity"
shell:
"picard -Xmx6G EstimateLibraryComplexity INPUT={input} OUTPUT={output} USE_JDK_DEFLATER=TRUE USE_JDK_INFLATER=TRUE VERBOSITY=ERROR"
# ===============================================================================================
# 5. deepTools
# > multiBAMsummary
# > plotCorrelation
# > plotFingerprint
# > bamCoverage (read depth normalised bigWig files)
# ===============================================================================================
#
# # rule preseq:
# # input:
# # "results/bwa/{sample}_{stage}_{mark}.bam"
# # output:
# # "results/preseq/{sample}_{stage}_{mark}.ccurve.txt"
# # log:
# # "logs/{sample}_{stage}_{mark}.preseq"
# # shell:
# # "preseq lc_extrap -v -output {output} -bam {input} 2> {log}"
# #
# #
# # rule get_picard_complexity_metrics:
# # input:
# # "results/bwa/{sample}_{stage}_{mark}.bam"
# # output:
# # "results/picard/{sample}_{stage}_{mark}_est_lib_complex_metrics.txt"
# # log:
# # "logs/{sample}_{stage}_{mark}.picardLibComplexity"
# # shell:
# # "picard -Xmx6G EstimateLibraryComplexity INPUT={input} OUTPUT={output} USE_JDK_DEFLATER=TRUE USE_JDK_INFLATER=TRUE VERBOSITY=ERROR"
#
# # ===============================================================================================
# # 5. deepTools
# # > multiBAMsummary
# # > plotCorrelation
# # > plotFingerprint
# # > bamCoverage (read depth normalised bigWig files)
# # ===============================================================================================
rule deeptools_summary:
input:
......@@ -239,23 +252,19 @@ rule deeptools_correlation:
rule deeptools_coverage:
input:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
bam = "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam",
bai = "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"
output:
"results/deeptools/{sample}_{stage}_{mark}.SeqDepthNorm.bw"
log:
"logs/{sample}_{stage}_{mark}_coverage.deepTools"
shell:
"bamCoverage \
--bam {input} \
--binSize 10 \
--normalizeUsing RPGC \
--effectiveGenomeSize add mm10 \
--extendReads \
-o {output} 2> {log}"
"bamCoverage --bam {input.bam} --binSize 10 --normalizeUsing RPGC --effectiveGenomeSize 2308125349 --numberOfProcessors 4 -o {output} 2> {log}"
rule deeptools_fingerprint:
input:
expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK)
bam = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK),
bai = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark = MARK)
output:
fig="results/deeptools/multiBAM_fingerprint.png",
metrics="results/deeptools/multiBAM_fingerprint_metrics.txt",
......@@ -265,7 +274,7 @@ rule deeptools_fingerprint:
"logs/fingerprint.deepTools"
shell:
"plotFingerprint -p {threads} \
-b {input} \
-b {input.bam} \
--plotFile {output.fig} \
--outQualityMetrics {output.metrics} \
--outRawCounts {output.rawcounts} \
......@@ -275,22 +284,23 @@ rule deeptools_fingerprint:
rule deeptools_plotCoverage:
input:
expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK)
bam = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark=MARK),
bai = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark=MARK)
output:
fig="results/deeptools/plot_coverage.png",
rawcounts="results/deeptools/plot_coverage_rawcounts.tab"
log:
"logs/plotCoverage.deepTools"
shell:
"plotCoverage --plotFile {output.fig} \
"plotCoverage --bamfiles {input.bam} --plotFile {output.fig} \
-n 1000000 \
--outRawCounts {output.rawcounts} \
--ignoreDuplicates 2> {log}"
# ===============================================================================================
# 6. phantomPeakQuals
# ===============================================================================================
# # ===============================================================================================
# # 6. phantomPeakQuals
# # ===============================================================================================
rule phantomPeakQuals:
input:
......@@ -304,20 +314,20 @@ rule phantomPeakQuals:
shell:
"Rscript scripts/run_spp.R -c={input} -savp={output.savp} -savd={output.savd} -out={output.out} 2> {log}"
# ===============================================================================================
# 7. Call peaks (MACS2)
# ===============================================================================================
# # ===============================================================================================
# # 7. Call peaks (MACS2)
# # ===============================================================================================
rule call_peaks_macs2:
input:
control = "results/bwa/{control}_{stage}_{mark}_PPq30.sorted.dedup.bam",
case = "results/bwa/{control}_{stage}_{mark}_PPq30.sorted.dedup.bam"
control = "results/bwa/{control}_{stage}_{mark}_q30.sorted.dedup.bam",
case = "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam"
output:
"results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls",
"results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed",
"results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak",
log:
"logs/{case}_vs_{control}_call_peaks_macs2.log"
"logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log"
params:
name = "{case}_vs_{control}_{stage}_{mark}_macs2",
shell:
......@@ -328,116 +338,99 @@ rule call_peaks_macs2:
-g mm 2> {log} "
# ===============================================================================================
# 8. Peak QC
# > narrow peak counts
# > fraction of reads in peaks (convert BAM to bed first then do intersect with peaks)
# ===============================================================================================
# peak counts in a format that multiqc can handle
rule get_narrow_peak_counts_for_multiqc:
input:
peaks = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak"
output:
"results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json"
params:
peakType = "narrowPeak"
shell:
"python3 scripts/count_peaks.py \
--peak_type {params.peakType} \
--peaks {input.peaks} --sample_name {wildcards.case} > {output}"
## Convert BAM to tagAlign file for calculating FRiP QC metric (Fraction of reads in peaks)
rule bamToBed:
input:
"results/bwa/{case}_{stage}_{mark}_PPq30.sorted.dedup.bam"
output:
"results/bwa/{case}_{stage}_{mark}.bed"
log:
"logs/{case}_{stage}_{mark}_.bamToBed"
shell:
"samtools sort -n {input} | bedtools bamtobed -i - > {output}"
# # ===============================================================================================
# # 8. Peak QC
# # > narrow peak counts
# # > fraction of reads in peaks (convert BAM to bed first then do intersect with peaks)
# # ===============================================================================================
## Fraction of reads in peaks
rule frip:
input:
bed = "results/bwa/{case}_{stage}_{mark}.bed",
peak = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak"
output:
"results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt"
shell:
"python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}"
# ===============================================================================================
# 9. Create consensus peaksets for replicates
# ===============================================================================================
# Based on ENCODE `overlap_peaks.py` - recommended for histone marks.
# Need to pool peaks first for each replicate
#peak counts in a format that multiqc can handle
# rule get_narrow_peak_counts_for_multiqc:
# input:
# peaks = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak"
# output:
# "results/macs2/{case}-vs-{control}_{stage}_{mark}-narrowpeak-count_mqc.json"
# params:
# peakType = "narrowPeak"
# shell:
# "python3 scripts/count_peaks.py \
# --peak_type {params.peakType} \
# --peaks {input.peaks} --sample_name {wildcards.case} > {output}"
#
#
# ## Convert BAM to tagAlign file for calculating FRiP QC metric (Fraction of reads in peaks)
# rule bamToBed:
# input:
# "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam"
# output:
# "results/bwa/{case}_{stage}_{mark}.bed"
# log:
# "logs/{case}_{stage}_{mark}.bamToBed"
# shell:
# "samtools sort -n {input} | bedtools bamtobed -i - > {output}"
#
#
# ## Fraction of reads in peaks
# rule frip:
# input:
# bed = "results/bwa/{case}_{stage}_{mark}.bed",
# peak = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak"
# output:
# "results/frip/{case}_vs_{control}_{stage}_{mark}.frip.txt"
# shell:
# "python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}"
rule pool_peaks:
input:
M1S1 = expand(["results/macs2/{case}_vs_{control}_E10.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M1S2 = expand(["results/macs2/{case}_vs_{control}_E11.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M1S3 = expand(["results/macs2/{case}_vs_{control}_E12.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M1S4 = expand(["results/macs2/{case}_vs_{control}_E13.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M1S5 = expand(["results/macs2/{case}_vs_{control}_E14.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M1S6 = expand(["results/macs2/{case}_vs_{control}_E15.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M2S1 = expand(["results/macs2/{case}_vs_{control}_E10.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M2S2 = expand(["results/macs2/{case}_vs_{control}_E11.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M2S3 = expand(["results/macs2/{case}_vs_{control}_E12.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M2S4 = expand(["results/macs2/{case}_vs_{control}_E13.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M2S5 = expand(["results/macs2/{case}_vs_{control}_E14.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS),
M2S6 = expand(["results/macs2/{case}_vs_{control}_E15.5_H3K4me3_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS)
output:
M1S1 = "results/macs2/E10.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
M1S2 = "results/macs2/E11.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
M1S3 = "results/macs2/E12.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
M1S4 = "results/macs2/E13.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
M1S5 = "results/macs2/E14.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
M1S6 = "results/macs2/E15.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
M2S1 = "results/macs2/E10.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
M2S2 = "results/macs2/E11.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
M2S3 = "results/macs2/E12.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
M2S4 = "results/macs2/E13.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
M2S5 = "results/macs2/E14.5_H3K4me3_macs2_pooled_peaks.narrowPeak",
M2S6 = "results/macs2/E15.5_H3K4me3_macs2_pooled_peaks.narrowPeak"
run:
shell("cat {input.M1S1} > {output.M1S1}")
shell("cat {input.M1S2} > {output.M1S2}")
shell("cat {input.M1S3} > {output.M1S3}")
shell("cat {input.M1S4} > {output.M1S4}")
shell("cat {input.M1S5} > {output.M1S5}")
shell("cat {input.M1S6} > {output.M1S6}")
shell("cat {input.M2S1} > {output.M2S1}")
shell("cat {input.M2S2} > {output.M2S2}")
shell("cat {input.M2S3} > {output.M2S3}")
shell("cat {input.M2S4} > {output.M2S4}")
shell("cat {input.M2S5} > {output.M2S5}")
shell("cat {input.M2S6} > {output.M2S6}")
rule overlap_peaks_H3K4me3:
input:
peaks=expand(["results/macs2/{case}_vs_{control}_E10.5_H3K4me3_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E11.5_H3K4me3_macs2_peaks.narrowPeak"],["results/macs2/{case}_vs_{control}_E12.5_H3K4me3_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E13.5_H3K4me3_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E14.5_H3K4me3_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E15.5_H3K4me3_macs2_peaks.narrowPeak"],zip, case=IPS, control=INPUTS),
pooled=expand("results/macs2/{stage}_{mark}_macs2_pooled_peaks.narrowPeak", zip, stage = STAGE, mark = MARK)
output:
expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK)
shell:
"python2.7 scripts/overlap_peaks.py {input.peaks} {input.pooled} {output}"
# # ===============================================================================================
# # 9. Create consensus peaksets for replicates
# # ===============================================================================================
#
# # Based on ENCODE `overlap_peaks.py` - recommended for histone marks.
# # Need to pool peaks first for each replicate
#
# rule pool_peaks:
# input:
# E10 = expand(["results/macs2/{case}_vs_{control}_E10.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
# E11 = expand(["results/macs2/{case}_vs_{control}_E11.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
# E12 = expand(["results/macs2/{case}_vs_{control}_E12.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
# E13 = expand(["results/macs2/{case}_vs_{control}_E13.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
# E14 = expand(["results/macs2/{case}_vs_{control}_E14.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
# E15 = expand(["results/macs2/{case}_vs_{control}_E15.5_{mark}_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK)
# output:
# E10 = "results/macs2/E10.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
# E11 = "results/macs2/E11.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
# E12 = "results/macs2/E12.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
# E13 = "results/macs2/E13.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
# E14 = "results/macs2/E14.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
# E15 = "results/macs2/E15.5_H3K27ac_macs2_pooled_peaks.narrowPeak",
# run:
# shell("cat {input.E10} > {output.E10}")
# shell("cat {input.E11} > {output.E11}")
# shell("cat {input.E12} > {output.E12}")
# shell("cat {input.E13} > {output.E13}")
# shell("cat {input.E14} > {output.E14}")
# shell("cat {input.E15} > {output.E15}")
#
# rule overlap_peaks:
# input:
# E10= expand(["results/macs2/{case}_vs_{control}_E10.5_{mark}_macs2_peaks.narrowPeak"]),
# E11= expand(["results/macs2/{case}_vs_{control}_E11.5_{mark}_macs2_peaks.narrowPeak"]),
# E12= expand(["results/macs2/{case}_vs_{control}_E12.5_{mark}_macs2_peaks.narrowPeak"]),
# E13= expand(["results/macs2/{case}_vs_{control}_E13.5_{mark}_macs2_peaks.narrowPeak"]),
# E14= expand(["results/macs2/{case}_vs_{control}_E14.5_{mark}_macs2_peaks.narrowPeak"]),
# E15= expand(["results/macs2/{case}_vs_{control}_E15.5_{mark}_macs2_peaks.narrowPeak"]).
# E10_pooled=expand("results/macs2/E10.5_{mark}_macs2_pooled_peaks.narrowPeak")
# E11_pooled=expand("results/macs2/E11.5_{mark}_macs2_pooled_peaks.narrowPeak")
# E12_pooled=
# E13_pooled=
# E14_pooled=
# E15_pooled=
# output:
# expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK)
# shell:
# "python2.7 scripts/overlap_peaks.py {input.peaks} {input.pooled} {output}"
#
rule overlap_peaks_H3K27ac:
input:
peaks=expand(["results/macs2/{case}_vs_{control}_E10.5_H3K27ac_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E11.5_H3K27ac_macs2_peaks.narrowPeak"],["results/macs2/{case}_vs_{control}_E12.5_H3K27ac_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E13.5_H3K27ac_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E14.5_H3K27ac_macs2_peaks.narrowPeak"], ["results/macs2/{case}_vs_{control}_E15.5_H3K27ac_macs2_peaks.narrowPeak"],zip, case=IPS, control=INPUTS),
pooled=expand("results/macs2/{stage}_{mark}_macs2_pooled_peaks.narrowPeak", zip, stage = STAGE, mark = MARK)
output:
expand("results/macs2/{stage}_{mark}_overlap.narrowPeak", zip, stage= STAGE, mark = MARK)
shell:
"python2.7 scripts/overlap_peaks.py {input.peaks} {input.pooled} {output}"
# ===============================================================================================
# 10. QC on replicate peaks
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment