Skip to content
Snippets Groups Projects
Commit 6f7e510a authored by Laura Cook's avatar Laura Cook
Browse files

created separate snakefiles for histone marks as I couldn't get the wildcards...

created separate snakefiles for histone marks as I couldn't get the wildcards to work so had to hardcode one of the rules
parent 8661c846
No related branches found
No related tags found
No related merge requests found
......@@ -12,7 +12,7 @@
# 10. Present QC for raw read, alignment, peak-calling in MultiQC
configfile: "envs/config.yaml"
configfile: "configs/config_H3K427ac.yaml"
# Contains sample and genome information
# calls the SRR.txt file that contains the samples as either IP or control
......@@ -40,7 +40,7 @@ unique_marks = list(set(MARK))
unique_stages = list(set(STAGE))
## combine all samples
all_samples = IPS + unique_inputs
all_samples = IPS + INPUTS
# ===============================================================================================
# Output targets
......@@ -80,17 +80,19 @@ rule all:
"logs/input_E13.5_H3K27ac.mergeBAM",
"logs/input_E14.5_H3K27ac.mergeBAM",
"logs/input_E15.5_H3K27ac.mergeBAM",
"results/qc/multibamsum.npz",
"results/qc/multibamsum.tab",
"results/qc/pearsoncor_multibamsum.png",
"results/qc/pearsoncor_multibamsum_matrix.txt",
expand("results/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK),
"results/qc/multiBAM_fingerprint.png",
"results/qc/multiBAM_fingerprint_metrics.txt",
"results/qc/multiBAM_fingerprint_rawcounts.txt",
"results/qc/plot_coverage.png",
"results/qc/plot_coverage_rawcounts.tab",
expand("results/qc/{sample}_{stage}_{mark}.pbc.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
# "results/qc/multibamsum.npz",
# "results/qc/multibamsum.tab",
# "results/qc/pearsoncor_multibamsum.png",
# "results/qc/pearsoncor_multibamsum_matrix.txt",
# expand("results/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK),
# "results/qc/multiBAM_fingerprint.png",
# "results/qc/multiBAM_fingerprint_metrics.txt",
# "results/qc/multiBAM_fingerprint_rawcounts.txt",
# "results/qc/plot_coverage.png",
# "results/qc/plot_coverage_rawcounts.tab",
#expand("results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("logs/{sample}_{stage}_{mark}.pbc.sort", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("results/qc/{sample}_{stage}_{mark}.pbc.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc",zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
......@@ -100,14 +102,13 @@ rule all:
expand("results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed", zip, case=IPS, stage=STAGE, mark=MARK),
expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK),
expand("results/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
directory("results/macs2/pooled"),
# "results/macs2/H3K27ac_E10.5_overlap.narrowPeak",
# "results/macs2/H3K27ac_E11.5_overlap.narrowPeak",
# "results/macs2/H3K27ac_E12.5_overlap.narrowPeak",
# "results/macs2/H3K27ac_E13.5_overlap.narrowPeak",
# "results/macs2/H3K27ac_E14.5_overlap.narrowPeak",
# "results/macs2/H3K27ac_E15.5_overlap.narrowPeak"
# expand("results/macs2/{stage}_H3K27ac_overlap.narrowPeak", zip, stage= STAGE, mark = MARK)
directory("results/macs2/pooled/"),
"results/macs2/H3K27ac_E10.5_overlap.narrowPeak",
"results/macs2/H3K27ac_E11.5_overlap.narrowPeak",
"results/macs2/H3K27ac_E12.5_overlap.narrowPeak",
"results/macs2/H3K27ac_E13.5_overlap.narrowPeak",
"results/macs2/H3K27ac_E14.5_overlap.narrowPeak",
"results/macs2/H3K27ac_E15.5_overlap.narrowPeak"
# directory("results/multiqc/multiqc_report_data/"),
# "results/multiqc/multiqc_report.html"
......@@ -267,22 +268,41 @@ rule mappingStats:
shell("samtools flagstat {input.c} > {output.c}")
shell("samtools flagstat {input.d} > {output.d}")
rule estimate_lib_complexity:
input:
"results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam"
output:
qc = "results/qc/{sample}_{stage}_{mark}.pbc.qc",
log:
"logs/{sample}_{stage}_{mark}.pbc"
shell:
"""
bedtools bamtobed -bedpe -i {input} \
| awk 'BEGIN{{OFS="\\t"}}{{print $1,$2,$4,$6,$9,$10}}' \
| sort | uniq -c \
| awk 'BEGIN{{mt=0;m0=0;m1=0;m2=0}} ($1==1){{m1=m1+1}} ($1==2){{m2=m2+1}} \
{{m0=m0+1}} {{mt=mt+$1}} END{{printf "%d\\t%d\\t%d\\t%d\\t%f\\t%f\\t%f\\n" ,mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}}' \
> {output.qc}
"""
# rule sort_name:
# input:
# "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam"
# output:
# tmp = "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam"
# log:
# "logs/{sample}_{stage}_{mark}.pbc.sort"
# run:
# shell("samtools sort -n {input} -o {output.tmp} 2> {log}")
#
# rule estimate_lib_complexity:
# input:
# "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam"
# output:
# qc = "results/qc/{sample}_{stage}_{mark}.pbc.qc",
# log:
# "logs/{sample}_{stage}_{mark}.pbc"
# shell:
# """
# bedtools bamtobed -i {input} \
# | awk 'BEGIN{{OFS="\\t"}}{{print $1,$2,$4,$6}}' \
# | sort | uniq -c \
# | awk 'BEGIN{{mt=0;m0=0;m1=0;m2=0}} ($1==1){{m1=m1+1}} ($1==2){{m2=m2+1}} \
# {{m0=m0+1}} {{mt=mt+$1}} END{{printf "%d\\t%d\\t%d\\t%d\\t%f\\t%f\\t%f\\n" ,mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}}' \
# > {output.qc}
# """
#
## convert to bedPE
## print read 1 scaffold, read 1 start coordinate, read 2 scaffold, read 2 end coordinate, strand read 1, strand read 2
## remove mitochondrial genome
## sort by position and obtain uniq reads
## Format of file
## TotalReadPairs [tab] DistinctReadPairs [tab] OneReadPair [tab] TwoReadPairs [tab] NRF=Distinct/Total [tab] PBC1=OnePair/Distinct [tab] PBC2=OnePair/TwoPair
# ===============================================================================================
# 5. deepTools
......@@ -470,23 +490,23 @@ rule call_peaks_macs2_pooled_replicates:
E12C = "results/bwa/input_E12.5_H3K27ac_q30.sorted.dedup.bam",
E13C = "results/bwa/input_E13.5_H3K27ac_q30.sorted.dedup.bam",
E14C = "results/bwa/input_E14.5_H3K27ac_q30.sorted.dedup.bam",
E15C = "results/bwa/input_E15.5_H3K27ac_q30.sorted.dedup.bam",
E15C = "results/bwa/input_E15.5_H3K27ac_q30.sorted.dedup.bam"
output:
directory("results/macs2/pooled")
directory("results/macs2/pooled/")
log:
E10 = "results/qc/E10.5_H3K27ac.pooled.macs2",
E11 = "results/qc/E11.5_H3K27ac.pooled.macs2",
E12 = "results/qc/E12.5_H3K27ac.pooled.macs2",
E13 = "results/qc/E13.5_H3K27ac.pooled.macs2",
E14 = "results/qc/E14.5_H3K27ac.pooled.macs2",
E15 = "results/qc/E15.5_H3K27ac.pooled.macs2",
E15 = "results/qc/E15.5_H3K27ac.pooled.macs2"
params:
E10 = "E10.5_H3K27ac.pooled.macs2",
E11 = "E11.5_H3K27ac.pooled.macs2",
E12 = "E12.5_H3K27ac.pooled.macs2",
E13 = "E13.5_H3K27ac.pooled.macs2",
E14 = "E14.5_H3K27ac.pooled.macs2",
E15 = "E15.5_H3K27ac.pooled.macs2",
E15 = "E15.5_H3K27ac.pooled.macs2"
run:
shell("macs2 callpeak -f BAM -t {input.E10} \
-c {input.E10C} --keep-dup all \
......@@ -556,12 +576,12 @@ rule frip:
rule overlap_peaks:
input:
E10= expand(["results/macs2/{case}_vs_{control}_E10.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
E11= expand(["results/macs2/{case}_vs_{control}_E11.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
E12= expand(["results/macs2/{case}_vs_{control}_E12.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
E13= expand(["results/macs2/{case}_vs_{control}_E13.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
E14= expand(["results/macs2/{case}_vs_{control}_E14.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
E15= expand(["results/macs2/{case}_vs_{control}_E15.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK),
E10= ["results/macs2/ENCFF213EBC_vs_ENCFF157KEH_E10.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF548BRR_vs_ENCFF825AVI_E10.5_H3K27ac_macs2_peaks.narrowPeak"],
E11= ["results/macs2/ENCFF512SFE_vs_ENCFF184CUE_E11.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF515PKL_vs_ENCFF376FGM_E11.5_H3K27ac_macs2_peaks.narrowPeak"],
E12= ["results/macs2/ENCFF011NFM_vs_ENCFF058AUT_E12.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF394TZN_vs_ENCFF203JQV_E12.5_H3K27ac_macs2_peaks.narrowPeak"],
E13= ["results/macs2/ENCFF194ORC_vs_ENCFF117QRC_E13.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF290ZNF_vs_ENCFF248PGK_E13.5_H3K27ac_macs2_peaks.narrowPeak"],
E14= ["results/macs2/ENCFF902HAR_vs_ENCFF002HZV_E14.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF327VAO_vs_ENCFF784ORI_E14.5_H3K27ac_macs2_peaks.narrowPeak"],
E15= ["results/macs2/ENCFF707WKL_vs_ENCFF182XFG_E15.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF584JFB_vs_ENCFF727QTS_E15.5_H3K27ac_macs2_peaks.narrowPeak"],
E10_pooled="results/macs2/E10.5_H3K27ac.pooled.macs2_peaks.narrowPeak",
E11_pooled="results/macs2/E11.5_H3K27ac.pooled.macs2_peaks.narrowPeak",
E12_pooled="results/macs2/E12.5_H3K27ac.pooled.macs2_peaks.narrowPeak",
......
## Author: Laura E Cook, University of Melbourne
## Purpose: ChIP-seq pipeline for histone marks
## This workflow only works for single-end reads
# 1. FastQC on raw reads
# 2. Alignment (downloaded from ENCODE)
# 3. Filtering
# 4. Alignment QC & Library Complexity
# 5. deepTools
# 7. cross correlation (SPP)
# 8. Call narrow peaks (MACS2)
# 9. Create consensus peaksets
# 10. Present QC for raw read, alignment, peak-calling in MultiQC
configfile: "configs/config_H3K4me3.yaml"
# Contains sample and genome information
# calls the SRR.txt file that contains the samples as either IP or control
import csv
import os
IPS = []
INPUTS = []
MARK = []
STAGE = []
with open(config['SAMPLES'], "r") as f:
reader = csv.reader(f, delimiter = "\t")
# skip the header
header = next(reader)
for row in reader:
STAGE.append(row[2])
MARK.append(row[1])
IPS.append(row[3])
INPUTS.append(row[4])
## multiple samples may use the same control input files
unique_inputs = list(set(INPUTS))
unique_marks = list(set(MARK))
unique_stages = list(set(STAGE))
## combine all samples
all_samples = IPS + INPUTS
# ===============================================================================================
# Output targets
# ===============================================================================================
rule all:
input:
expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}.dedup.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}.dupmark.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}.q30.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}.unfiltered.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
"results/bwa/E10.5_H3K4me3_q30.sorted.pooled.dedup.bam",
"results/bwa/E11.5_H3K4me3_q30.sorted.pooled.dedup.bam",
"results/bwa/E12.5_H3K4me3_q30.sorted.pooled.dedup.bam",
"results/bwa/E13.5_H3K4me3_q30.sorted.pooled.dedup.bam",
"results/bwa/E14.5_H3K4me3_q30.sorted.pooled.dedup.bam",
"results/bwa/E15.5_H3K4me3_q30.sorted.pooled.dedup.bam",
"results/bwa/input_E10.5_H3K4me3_q30.sorted.dedup.bam",
"results/bwa/input_E11.5_H3K4me3_q30.sorted.dedup.bam",
"results/bwa/input_E12.5_H3K4me3_q30.sorted.dedup.bam",
"results/bwa/input_E13.5_H3K4me3_q30.sorted.dedup.bam",
"results/bwa/input_E14.5_H3K4me3_q30.sorted.dedup.bam",
"results/bwa/input_E15.5_H3K4me3_q30.sorted.dedup.bam",
"logs/E10.5_H3K4me3.mergeBAM",
"logs/E11.5_H3K4me3.mergeBAM",
"logs/E12.5_H3K4me3.mergeBAM",
"logs/E13.5_H3K4me3.mergeBAM",
"logs/E14.5_H3K4me3.mergeBAM",
"logs/E15.5_H3K4me3.mergeBAM",
"logs/input_E10.5_H3K4me3.mergeBAM",
"logs/input_E11.5_H3K4me3.mergeBAM",
"logs/input_E12.5_H3K4me3.mergeBAM",
"logs/input_E13.5_H3K4me3.mergeBAM",
"logs/input_E14.5_H3K4me3.mergeBAM",
"logs/input_E15.5_H3K4me3.mergeBAM",
# "results/qc/multibamsum.npz",
# "results/qc/multibamsum.tab",
# "results/qc/pearsoncor_multibamsum.png",
# "results/qc/pearsoncor_multibamsum_matrix.txt",
# expand("results/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK),
# "results/qc/multiBAM_fingerprint.png",
# "results/qc/multiBAM_fingerprint_metrics.txt",
# "results/qc/multiBAM_fingerprint_rawcounts.txt",
# "results/qc/plot_coverage.png",
# "results/qc/plot_coverage_rawcounts.tab",
#expand("results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("logs/{sample}_{stage}_{mark}.pbc.sort", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("results/qc/{sample}_{stage}_{mark}.pbc.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc",zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
expand("results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed", zip, case=IPS, stage=STAGE, mark=MARK),
expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK),
expand("results/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
directory("results/macs2/pooled/"),
"results/macs2/H3K4me3_E10.5_overlap.narrowPeak",
"results/macs2/H3K4me3_E11.5_overlap.narrowPeak",
"results/macs2/H3K4me3_E12.5_overlap.narrowPeak",
"results/macs2/H3K4me3_E13.5_overlap.narrowPeak",
"results/macs2/H3K4me3_E14.5_overlap.narrowPeak",
"results/macs2/H3K4me3_E15.5_overlap.narrowPeak"
# directory("results/multiqc/multiqc_report_data/"),
# "results/multiqc/multiqc_report.html"
# ===============================================================================================
# 1. FASTQC
# ===============================================================================================
## Done separately because fastq and BAM files don't share file prefixes so it becomes very messy
# without renaming everything
# ===============================================================================================
# 2. ALIGNMENT
# > ENCODE alignment downloaded from ENCODE database
# ===============================================================================================
## Singled-end 36-50nt reads
## bwa-aln used for aligning reads to the mouse genome (mm10 assembly)
## Alignment command: bwa-aln -q 5 -l 32 -k 2 <reference_file> <read_file>
## bwa version version 0.7.7
## -q = parameter for read trimming
## -l = read length
## -k = Maximum edit distance in the seed
# ===============================================================================================
# 4. FILTERING
# > remove unmapped, mate unmapped
# > remove low MAPQ reads (-q 30)
# > -F 1804
# -Read unmapped
# -Mate unmapped
# -Not primary alignment
# -Read fails platform/vendor quality checks
# -Read is PCR or optical duplicate
# > mark duplicates & remove duplicates (picard)
# > re-filter, sort and index final BAM
# ===============================================================================================
rule filter:
input:
"results/bwa/{sample}_{stage}_{mark}.bam"
output:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam"
log:
"logs/{sample}_{stage}_{mark}.filter"
shell:
"samtools view -b -F 1804 -q 30 {input} | samtools sort -o {output} - 2> {log}"
rule markDups:
input:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam"
output:
bam="results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam",
dupQC="results/bwa/{sample}_{stage}_{mark}.dupmark.qc"
log:
"logs/{sample}_{stage}_{mark}.dedup"
shell:
"picard MarkDuplicates I={input} O={output.bam} \
METRICS_FILE={output.dupQC} REMOVE_DUPLICATES=false ASSUME_SORTED=true 2> {log}"
rule dedup:
input:
"results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam"
output:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
log:
"logs/{sample}_{stage}_{mark}.dedup"
shell:
"samtools view -F 1804 -b {input} | samtools sort -o {output} - 2> {log}"
rule indexBam:
input:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"
output:
"results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"
log:
"logs/{sample}_{stage}_{mark}.indexBam"
shell:
"samtools index {input} {output} 2> {log}"
rule mergeBAMreplicates:
input:
E10 = ["results/bwa/ENCFF124UYX_E10.5_{mark}_q30.sorted.dedup.bam", "results/bwa/ENCFF045IPK_E10.5_{mark}_q30.sorted.dedup.bam"],
E11 = ["results/bwa/ENCFF760QYZ_E11.5_{mark}_q30.sorted.dedup.bam", "results/bwa/ENCFF717QDV_E11.5_{mark}_q30.sorted.dedup.bam"],
E12 = ["results/bwa/ENCFF182ZPF_E12.5_{mark}_q30.sorted.dedup.bam", "results/bwa/ENCFF941QJZ_E12.5_{mark}_q30.sorted.dedup.bam"],
E13 = ["results/bwa/ENCFF485UDC_E13.5_{mark}_q30.sorted.dedup.bam", "results/bwa/ENCFF124TAB_E13.5_{mark}_q30.sorted.dedup.bam"],
E14 = ["results/bwa/ENCFF724DMU_E14.5_{mark}_q30.sorted.dedup.bam", "results/bwa/ENCFF665QBJ_E14.5_{mark}_q30.sorted.dedup.bam"],
E15 = ["results/bwa/ENCFF258KCR_E15.5_{mark}_q30.sorted.dedup.bam", "results/bwa/ENCFF401BKM_E15.5_{mark}_q30.sorted.dedup.bam"],
E10C = ["results/bwa/ENCFF157KEH_E10.5_H3K4me3_q30.sorted.dedup.bam", "results/bwa/ENCFF825AVI_E10.5_H3K4me3_q30.sorted.dedup.bam"],
E11C = ["results/bwa/ENCFF184CUE_E11.5_H3K4me3_q30.sorted.dedup.bam", "results/bwa/ENCFF376FGM_E11.5_H3K4me3_q30.sorted.dedup.bam"],
E12C = ["results/bwa/ENCFF203JQV_E12.5_H3K4me3_q30.sorted.dedup.bam", "results/bwa/ENCFF058AUT_E12.5_H3K4me3_q30.sorted.dedup.bam"],
E13C = ["results/bwa/ENCFF117QRC_E13.5_H3K4me3_q30.sorted.dedup.bam", "results/bwa/ENCFF248PGK_E13.5_H3K4me3_q30.sorted.dedup.bam"],
E14C = ["results/bwa/ENCFF784ORI_E14.5_H3K4me3_q30.sorted.dedup.bam", "results/bwa/ENCFF002HZV_E14.5_H3K4me3_q30.sorted.dedup.bam"],
E15C = ["results/bwa/ENCFF727QTS_E15.5_H3K4me3_q30.sorted.dedup.bam", "results/bwa/ENCFF182XFG_E15.5_H3K4me3_q30.sorted.dedup.bam"],
output:
E10 = "results/bwa/E10.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E11 = "results/bwa/E11.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E12 = "results/bwa/E12.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E13 = "results/bwa/E13.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E14 = "results/bwa/E14.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E15 = "results/bwa/E15.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E10C = "results/bwa/input_E10.5_H3K4me3_q30.sorted.dedup.bam",
E11C = "results/bwa/input_E11.5_H3K4me3_q30.sorted.dedup.bam",
E12C = "results/bwa/input_E12.5_H3K4me3_q30.sorted.dedup.bam",
E13C = "results/bwa/input_E13.5_H3K4me3_q30.sorted.dedup.bam",
E14C = "results/bwa/input_E14.5_H3K4me3_q30.sorted.dedup.bam",
E15C = "results/bwa/input_E15.5_H3K4me3_q30.sorted.dedup.bam"
log:
E10 = "logs/E10.5_H3K4me3.mergeBAM",
E11 = "logs/E11.5_H3K4me3.mergeBAM",
E12 = "logs/E12.5_H3K4me3.mergeBAM",
E13 = "logs/E13.5_H3K4me3.mergeBAM",
E14 = "logs/E14.5_H3K4me3.mergeBAM",
E15 = "logs/E15.5_H3K4me3.mergeBAM",
E10C = "logs/input_E10.5_H3K4me3.mergeBAM",
E11C = "logs/input_E11.5_H3K4me3.mergeBAM",
E12C = "logs/input_E12.5_H3K4me3.mergeBAM",
E13C = "logs/input_E13.5_H3K4me3.mergeBAM",
E14C = "logs/input_E14.5_H3K4me3.mergeBAM",
E15C = "logs/input_E15.5_H3K4me3.mergeBAM"
run:
shell("samtools merge {output.E10} {input.E10} 2> {log.E10}")
shell("samtools merge {output.E11} {input.E11} 2> {log.E11}")
shell("samtools merge {output.E12} {input.E12} 2> {log.E12}")
shell("samtools merge {output.E13} {input.E13} 2> {log.E13}")
shell("samtools merge {output.E14} {input.E14} 2> {log.E14}")
shell("samtools merge {output.E15} {input.E15} 2> {log.E15}")
shell("samtools merge {output.E10C} {input.E10C} 2> {log.E10C}")
shell("samtools merge {output.E11C} {input.E11C} 2> {log.E11C}")
shell("samtools merge {output.E12C} {input.E12C} 2> {log.E12C}")
shell("samtools merge {output.E13C} {input.E13C} 2> {log.E13C}")
shell("samtools merge {output.E14C} {input.E14C} 2> {log.E14C}")
shell("samtools merge {output.E15C} {input.E15C} 2> {log.E15C}")
# ===============================================================================================
# 4. ALIGNMENT QC
# > SAMtools flagstat statistics
# > CollectMultipleMetrics (picard)
# > Compute Library Complexity (PreSeq)
# ===============================================================================================
rule mappingStats:
input:
a="results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam",
b="results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam",
c="results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam",
d="results/bwa/{sample}_{stage}_{mark}.bam"
output:
a="results/qc/{sample}_{stage}_{mark}.dedup.flagstat.qc",
b="results/qc/{sample}_{stage}_{mark}.dupmark.flagstat.qc",
c="results/qc/{sample}_{stage}_{mark}.q30.flagstat.qc",
d="results/qc/{sample}_{stage}_{mark}.unfiltered.flagstat.qc",
run:
shell("samtools flagstat {input.a} > {output.a}")
shell("samtools flagstat {input.b} > {output.b}")
shell("samtools flagstat {input.c} > {output.c}")
shell("samtools flagstat {input.d} > {output.d}")
# rule sort_name:
# input:
# "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam"
# output:
# tmp = "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam"
# log:
# "logs/{sample}_{stage}_{mark}.pbc.sort"
# run:
# shell("samtools sort -n {input} -o {output.tmp} 2> {log}")
#
# rule estimate_lib_complexity:
# input:
# "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam"
# output:
# qc = "results/qc/{sample}_{stage}_{mark}.pbc.qc",
# log:
# "logs/{sample}_{stage}_{mark}.pbc"
# shell:
# """
# bedtools bamtobed -i {input} \
# | awk 'BEGIN{{OFS="\\t"}}{{print $1,$2,$4,$6}}' \
# | sort | uniq -c \
# | awk 'BEGIN{{mt=0;m0=0;m1=0;m2=0}} ($1==1){{m1=m1+1}} ($1==2){{m2=m2+1}} \
# {{m0=m0+1}} {{mt=mt+$1}} END{{printf "%d\\t%d\\t%d\\t%d\\t%f\\t%f\\t%f\\n" ,mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}}' \
# > {output.qc}
# """
#
## convert to bedPE
## print read 1 scaffold, read 1 start coordinate, read 2 scaffold, read 2 end coordinate, strand read 1, strand read 2
## remove mitochondrial genome
## sort by position and obtain uniq reads
## Format of file
## TotalReadPairs [tab] DistinctReadPairs [tab] OneReadPair [tab] TwoReadPairs [tab] NRF=Distinct/Total [tab] PBC1=OnePair/Distinct [tab] PBC2=OnePair/TwoPair
# ===============================================================================================
# 5. deepTools
# > multiBAMsummary
# > plotCorrelation
# > plotFingerprint
# > bamCoverage (read depth normalised bigWig files)
# ===============================================================================================
rule deeptools_summary:
input:
expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK)
output:
sum="results/qc/multibamsum.npz",
counts="results/qc/multibamsum.tab"
threads: 32
log:
"logs/multisummary.deepTools"
shell:
" multiBamSummary bins \
-p {threads} \
-b {input} \
--centerReads \
-out {output.sum} \
--outRawCounts {output.counts} 2> {log}"
rule deeptools_correlation:
input: "results/qc/multibamsum.npz"
output:
fig="results/qc/pearsoncor_multibamsum.png",
matrix="results/qc/pearsoncor_multibamsum_matrix.txt"
log:
"logs/correlation.deepTools"
shell:
"plotCorrelation \
--corData {input} \
--plotFile {output.fig} \
--outFileCorMatrix {output.matrix} \
--corMethod pearson \
--whatToPlot heatmap \
--skipZeros \
--plotNumbers \
--colorMap RdYlBu 2> {log}"
rule deeptools_coverage:
input:
bam = "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam",
bai = "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"
output:
"results/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw"
log:
"logs/{sample}_{stage}_{mark}_coverage.deepTools"
shell:
"bamCoverage --bam {input.bam} --binSize 10 --normalizeUsing RPGC --effectiveGenomeSize 2308125349 --numberOfProcessors 4 -o {output} 2> {log}"
rule deeptools_fingerprint:
input:
bam = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK),
bai = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark = MARK)
output:
fig="results/qc/multiBAM_fingerprint.png",
metrics="results/qc/multiBAM_fingerprint_metrics.txt",
rawcounts="results/qc/multiBAM_fingerprint_rawcounts.txt"
threads: 32
log:
"logs/fingerprint.deepTools"
shell:
"plotFingerprint -p {threads} \
-b {input.bam} \
--plotFile {output.fig} \
--outQualityMetrics {output.metrics} \
--outRawCounts {output.rawcounts} \
--minMappingQuality 20 \
--skipZeros \
--centerReads 2> {log}"
rule deeptools_plotCoverage:
input:
bam = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark=MARK),
bai = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark=MARK)
output:
fig="results/qc/plot_coverage.png",
rawcounts="results/qc/plot_coverage_rawcounts.tab"
log:
"logs/plotCoverage.deepTools"
shell:
"plotCoverage --bamfiles {input.bam} --plotFile {output.fig} \
-n 1000000 \
--outRawCounts {output.rawcounts} \
--ignoreDuplicates 2> {log}"
# ===============================================================================================
# 6. Cross-correlation scores - following ENCODE code
# ===============================================================================================
rule bamtobed_crossC:
input:
"results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam"
output:
tagAlign = "results/bed/{sample}_{stage}_{mark}_q30_SE.tagAlign.gz"
shell:
"""
bedtools bamtobed -i {input} | \
awk 'BEGIN{{OFS="\\t"}}{{$4='N';$5='1000';print $0}}' |\
gzip -nc > {output.tagAlign}
"""
## Subsample 15 million reads for cross-correlation analysis
## Estimate read length from first 100 reads
rule subsample_aligned_reads:
input:
"results/bed/{sample}_{stage}_{mark}_q30_SE.tagAlign.gz"
output:
subsample = "results/bed/{sample}_{stage}_{mark}.filt.sample.15Mreads.SE.tagAlign.gz",
tmp = "results/bed/{sample}_{stage}_{mark}_R1_trimmed_q30_SE.tagAlign.tmp"
params:
nreads= 15000000
run:
shell("""zcat {input} | shuf -n {params.nreads} --random-source=<(openssl enc -aes-256-ctr -pass pass:$(zcat -f {input} | wc -c) -nosalt </dev/zero 2>/dev/null) | gzip -nc > {output.subsample}""")
shell("zcat {input} > {output.tmp}")
# Determine exclusion range for fragment length estimation.
## From bamPEFragmentSize (deepTools) average fragment length is ~220bp
## See bamPEFragmentSize.histogram.png
# Use a fixed lowerbound at -500.
# Upperbound E
#EXCLUSION_RANGE_MAX is
# Histone ChIP-seq: max(read_len + 10, 100)
# lowerbound is fixed at 500 for both
rule cross_correlation_SSP:
input:
"results/bed/{sample}_{stage}_{mark}.filt.sample.15Mreads.SE.tagAlign.gz"
output:
CC_SCORES_FILE="results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc",
CC_PLOT_FILE="results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf"
log:
"logs/{sample}_{stage}_{mark}_filt_15Mreads.SE.spp.log"
params:
EXCLUSION_RANGE_MIN=-500,
EXCLUSION_RANGE_MAX=85
shell:
"Rscript scripts/run_spp.R -c={input} -savp={output.CC_PLOT_FILE} -out={output.CC_SCORES_FILE} -x={params.EXCLUSION_RANGE_MIN}:{params.EXCLUSION_RANGE_MAX} 2> {log}"
# CC_SCORE FILE format
# Filename <tab> numReads <tab> estFragLen <tab> corr_estFragLen <tab> PhantomPeak <tab> corr_phantomPeak <tab> argmin_corr <tab> min_corr <tab> phantomPeakCoef <tab> relPhantomPeakCoef <tab> QualityTag
# ===============================================================================================
# 7. Call peaks (MACS2)
# ===============================================================================================
rule call_peaks_macs2:
input:
control = "results/bwa/{control}_{stage}_{mark}_q30.sorted.dedup.bam",
case = "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam"
output:
"results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls",
"results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed",
"results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak",
log:
"logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log"
params:
name = "{case}_vs_{control}_{stage}_{mark}_macs2",
shell:
" macs2 callpeak -f BAM -t {input.case} \
-c {input.control} --keep-dup all \
--outdir results/macs2/ -p 0.01 \
-n {params.name} \
-g mm 2> {log} "
rule call_peaks_macs2_pooled_replicates:
input:
E10 = "results/bwa/E10.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E11 = "results/bwa/E11.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E12 = "results/bwa/E12.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E13 = "results/bwa/E13.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E14 = "results/bwa/E14.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E15 = "results/bwa/E15.5_H3K4me3_q30.sorted.pooled.dedup.bam",
E10C = "results/bwa/input_E10.5_H3K4me3_q30.sorted.dedup.bam",
E11C = "results/bwa/input_E11.5_H3K4me3_q30.sorted.dedup.bam",
E12C = "results/bwa/input_E12.5_H3K4me3_q30.sorted.dedup.bam",
E13C = "results/bwa/input_E13.5_H3K4me3_q30.sorted.dedup.bam",
E14C = "results/bwa/input_E14.5_H3K4me3_q30.sorted.dedup.bam",
E15C = "results/bwa/input_E15.5_H3K4me3_q30.sorted.dedup.bam"
output:
directory("results/macs2/pooled/")
log:
E10 = "results/qc/E10.5_H3K4me3.pooled.macs2",
E11 = "results/qc/E11.5_H3K4me3.pooled.macs2",
E12 = "results/qc/E12.5_H3K4me3.pooled.macs2",
E13 = "results/qc/E13.5_H3K4me3.pooled.macs2",
E14 = "results/qc/E14.5_H3K4me3.pooled.macs2",
E15 = "results/qc/E15.5_H3K4me3.pooled.macs2"
params:
E10 = "E10.5_H3K4me3.pooled.macs2",
E11 = "E11.5_H3K4me3.pooled.macs2",
E12 = "E12.5_H3K4me3.pooled.macs2",
E13 = "E13.5_H3K4me3.pooled.macs2",
E14 = "E14.5_H3K4me3.pooled.macs2",
E15 = "E15.5_H3K4me3.pooled.macs2"
run:
shell("macs2 callpeak -f BAM -t {input.E10} \
-c {input.E10C} --keep-dup all \
--outdir results/macs2/ -p 0.01 \
-n {params.E10} \
-g mm 2> {log.E10}" )
shell("macs2 callpeak -f BAM -t {input.E11} \
-c {input.E11C} --keep-dup all \
--outdir results/macs2/ -p 0.01 \
-n {params.E11} \
-g mm 2> {log.E11}" )
shell("macs2 callpeak -f BAM -t {input.E12} \
-c {input.E12C} --keep-dup all \
--outdir results/macs2/ -p 0.01 \
-n {params.E12} \
-g mm 2> {log.E12}" )
shell("macs2 callpeak -f BAM -t {input.E13} \
-c {input.E13C} --keep-dup all \
--outdir results/macs2/ -p 0.01 \
-n {params.E13} \
-g mm 2> {log.E13}" )
shell("macs2 callpeak -f BAM -t {input.E14} \
-c {input.E14C} --keep-dup all \
--outdir results/macs2/ -p 0.01 \
-n {params.E14} \
-g mm 2> {log.E14}" )
shell("macs2 callpeak -f BAM -t {input.E15} \
-c {input.E15C} --keep-dup all \
--outdir results/macs2/ -p 0.01 \
-n {params.E15} \
-g mm 2> {log.E15}" )
# ===============================================================================================
# 8. Peak QC
# > narrow peak counts
# > fraction of reads in peaks (convert BAM to bed first then do intersect with peaks)
# ===============================================================================================
## Convert BAM to tagAlign file for calculating FRiP QC metric (Fraction of reads in peaks)
rule bamToBed:
input:
"results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam"
output:
"results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed"
log:
"logs/{case}_{stage}_{mark}.bamToBed"
shell:
"samtools sort -n {input} | bedtools bamtobed -i - > {output}"
# ## Fraction of reads in peaks
rule frip:
input:
bed = "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed",
peak = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak"
output:
"results/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt"
shell:
"python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}"
# ===============================================================================================
# 9. Create consensus peaksets for replicates
# ===============================================================================================
# Based on ENCODE `overlap_peaks.py` - recommended for histone marks.
# Need to pool peaks first for each replicate
rule overlap_peaks:
input:
E10= ["results/macs2/ENCFF124UYX_vs_ENCFF157KEH_E10.5_H3K4me3_macs2_peaks.narrowPeak", "results/macs2/ENCFF045IPK_vs_ENCFF825AVI_E10.5_H3K4me3_macs2_peaks.narrowPeak"],
E11= ["results/macs2/ENCFF760QYZ_vs_ENCFF184CUE_E11.5_H3K4me3_macs2_peaks.narrowPeak", "results/macs2/ENCFF717QDV_vs_ENCFF376FGM_E11.5_H3K4me3_macs2_peaks.narrowPeak"],
E12= ["results/macs2/ENCFF941QJZ_vs_ENCFF058AUT_E12.5_H3K4me3_macs2_peaks.narrowPeak", "results/macs2/ENCFF182ZPF_vs_ENCFF203JQV_E12.5_H3K4me3_macs2_peaks.narrowPeak"],
E13= ["results/macs2/ENCFF485UDC_vs_ENCFF117QRC_E13.5_H3K4me3_macs2_peaks.narrowPeak", "results/macs2/ENCFF124TAB_vs_ENCFF248PGK_E13.5_H3K4me3_macs2_peaks.narrowPeak"],
E14= ["results/macs2/ENCFF665QBJ_vs_ENCFF002HZV_E14.5_H3K4me3_macs2_peaks.narrowPeak", "results/macs2/ENCFF724DMU_vs_ENCFF784ORI_E14.5_H3K4me3_macs2_peaks.narrowPeak"],
E15= ["results/macs2/ENCFF401BKM_vs_ENCFF182XFG_E15.5_H3K4me3_macs2_peaks.narrowPeak", "results/macs2/ENCFF258KCR_vs_ENCFF727QTS_E15.5_H3K4me3_macs2_peaks.narrowPeak"],
E10_pooled="results/macs2/E10.5_H3K4me3.pooled.macs2_peaks.narrowPeak",
E11_pooled="results/macs2/E11.5_H3K4me3.pooled.macs2_peaks.narrowPeak",
E12_pooled="results/macs2/E12.5_H3K4me3.pooled.macs2_peaks.narrowPeak",
E13_pooled="results/macs2/E13.5_H3K4me3.pooled.macs2_peaks.narrowPeak",
E14_pooled="results/macs2/E14.5_H3K4me3.pooled.macs2_peaks.narrowPeak",
E15_pooled="results/macs2/E15.5_H3K4me3.pooled.macs2_peaks.narrowPeak",
output:
E10= "results/macs2/H3K4me3_E10.5_overlap.narrowPeak",
E11= "results/macs2/H3K4me3_E11.5_overlap.narrowPeak",
E12= "results/macs2/H3K4me3_E12.5_overlap.narrowPeak",
E13= "results/macs2/H3K4me3_E13.5_overlap.narrowPeak",
E14= "results/macs2/H3K4me3_E14.5_overlap.narrowPeak",
E15= "results/macs2/H3K4me3_E15.5_overlap.narrowPeak"
run:
shell("python2.7 scripts/overlap_peaks.py {input.E10} {input.E10_pooled} {output.E10}")
shell("python2.7 scripts/overlap_peaks.py {input.E11} {input.E11_pooled} {output.E11}")
shell("python2.7 scripts/overlap_peaks.py {input.E12} {input.E12_pooled} {output.E12}")
shell("python2.7 scripts/overlap_peaks.py {input.E13} {input.E13_pooled} {output.E13}")
shell("python2.7 scripts/overlap_peaks.py {input.E14} {input.E14_pooled} {output.E14}")
shell("python2.7 scripts/overlap_peaks.py {input.E15} {input.E15_pooled} {output.E15}")
# ===============================================================================================
# 10. Combine all QC into multiqc output
# ===============================================================================================
#
# rule multiqc:
# input:
# fastqc
# expand("results/fastqc/{sample}_R1_fastqc.html", sample=all_samples),
# expand("results/fastqc/{sample}_R2_fastqc.html", sample=all_samples),
# expand("results/fastqc/{sample}_R1_fastqc.zip", sample=all_samples),
# expand("results/fastqc/{sample}_R2_fastqc.zip", sample=all_samples),
# # bwa
# expand("logs/{sample}.align", sample=all_samples),
# expand("logs/{sample}.flagstat.qc", sample=all_samples),
# # preseq
# expand("results/preseq/{sample}.ccurve.txt", sample=all_samples),
# # picard
# expand("results/picard/{sample}_est_lib_complex_metrics.txt", sample=all_samples),
# # deepTools
# "results/deeptools/multibamsum.npz",
# "results/deeptools/multibamsum.tab",
# "results/deeptools/pearsoncor_multibamsum.png",
# "results/deeptools/pearsoncor_multibamsum_matrix.txt",
# expand("results/deeptools/{sample}.SeqDepthNorm.bw", sample=all_samples),
# "results/deeptools/multiBAM_fingerprint.png",
# "results/deeptools/multiBAM_fingerprint_metrics.txt",
# "results/deeptools/multiBAM_fingerprint_rawcounts.txt",
# "results/deeptools/plot_coverage.png",
# "results/deeptools/plot_coverage_rawcounts.tab",
# "results/deeptools/bamPEFragmentSize_hist.png",
# "results/deeptools/bamPEFragmentSize_rawcounts.tab",
# # phantomPeaks
# expand("results/phantomPeaks/{sample}.spp.pdf", sample = IPS),
# expand("results/phantomPeaks/{sample}.spp.Rdata", sample = IPS),
# expand("results/phantomPeaks/{sample}.spp.out", sample = IPS),
# # macs2
# expand("results/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS),
# expand("results/macs2/{case}_vs_{control}_macs2_peaks.xls", zip, case=IPS, control=INPUTS),
# expand("results/macs2/{case}_vs_{control}_macs2_summits.bed", zip, case=IPS, control=INPUTS),
# expand("results/macs2/{case}-vs-{control}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS),
# expand("results/frip/{case}.frip.txt", case=IPS)
# params:
# "results/fastqc/",
# "results/bwa/",
# "logs/",
# "results/deeptools/",
# "results/macs2/",
# "results/phantomPeaks/",
# "results/frip/",
# "results/picard/"
# output:
# directory("results/multiqc/multiqc_report_data/"),
# "results/multiqc/multiqc_report.html",
# log:
# "logs/multiqc.log"
# shell:
# "multiqc -f {params} -m fastqc -m samtools -m picard -m preseq -m deeptools -m phantompeakqualtools -m macs2 -o results/multiqc/ \
# -n multiqc_report.html 2> {log}"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment