From 6f7e510a67b3a06c6097e5e2b49b317965565e56 Mon Sep 17 00:00:00 2001 From: Laura Cook <l.cook2@student.unimelb.edu.au> Date: Thu, 3 Sep 2020 10:31:05 +1000 Subject: [PATCH] created separate snakefiles for histone marks as I couldn't get the wildcards to work so had to hardcode one of the rules --- mouse/{Snakefile => Snakefile_H3K27ac} | 114 +++-- mouse/Snakefile_H3K4me3 | 663 +++++++++++++++++++++++++ 2 files changed, 730 insertions(+), 47 deletions(-) rename mouse/{Snakefile => Snakefile_H3K27ac} (87%) create mode 100644 mouse/Snakefile_H3K4me3 diff --git a/mouse/Snakefile b/mouse/Snakefile_H3K27ac similarity index 87% rename from mouse/Snakefile rename to mouse/Snakefile_H3K27ac index f0e2d17..55a1eea 100644 --- a/mouse/Snakefile +++ b/mouse/Snakefile_H3K27ac @@ -12,7 +12,7 @@ # 10. Present QC for raw read, alignment, peak-calling in MultiQC -configfile: "envs/config.yaml" +configfile: "configs/config_H3K427ac.yaml" # Contains sample and genome information # calls the SRR.txt file that contains the samples as either IP or control @@ -40,7 +40,7 @@ unique_marks = list(set(MARK)) unique_stages = list(set(STAGE)) ## combine all samples -all_samples = IPS + unique_inputs +all_samples = IPS + INPUTS # =============================================================================================== # Output targets @@ -80,17 +80,19 @@ rule all: "logs/input_E13.5_H3K27ac.mergeBAM", "logs/input_E14.5_H3K27ac.mergeBAM", "logs/input_E15.5_H3K27ac.mergeBAM", - "results/qc/multibamsum.npz", - "results/qc/multibamsum.tab", - "results/qc/pearsoncor_multibamsum.png", - "results/qc/pearsoncor_multibamsum_matrix.txt", - expand("results/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK), - "results/qc/multiBAM_fingerprint.png", - "results/qc/multiBAM_fingerprint_metrics.txt", - "results/qc/multiBAM_fingerprint_rawcounts.txt", - "results/qc/plot_coverage.png", - "results/qc/plot_coverage_rawcounts.tab", - expand("results/qc/{sample}_{stage}_{mark}.pbc.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), + # "results/qc/multibamsum.npz", + # "results/qc/multibamsum.tab", + # "results/qc/pearsoncor_multibamsum.png", + # "results/qc/pearsoncor_multibamsum_matrix.txt", + # expand("results/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK), + # "results/qc/multiBAM_fingerprint.png", + # "results/qc/multiBAM_fingerprint_metrics.txt", + # "results/qc/multiBAM_fingerprint_rawcounts.txt", + # "results/qc/plot_coverage.png", + # "results/qc/plot_coverage_rawcounts.tab", + #expand("results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), + #expand("logs/{sample}_{stage}_{mark}.pbc.sort", zip, sample=all_samples, stage=STAGE, mark=MARK), + #expand("results/qc/{sample}_{stage}_{mark}.pbc.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), expand("results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc",zip, sample=all_samples, stage=STAGE, mark=MARK), expand("results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK), expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), @@ -100,14 +102,13 @@ rule all: expand("results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed", zip, case=IPS, stage=STAGE, mark=MARK), expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK), expand("results/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), - directory("results/macs2/pooled"), - # "results/macs2/H3K27ac_E10.5_overlap.narrowPeak", - # "results/macs2/H3K27ac_E11.5_overlap.narrowPeak", - # "results/macs2/H3K27ac_E12.5_overlap.narrowPeak", - # "results/macs2/H3K27ac_E13.5_overlap.narrowPeak", - # "results/macs2/H3K27ac_E14.5_overlap.narrowPeak", - # "results/macs2/H3K27ac_E15.5_overlap.narrowPeak" - # expand("results/macs2/{stage}_H3K27ac_overlap.narrowPeak", zip, stage= STAGE, mark = MARK) + directory("results/macs2/pooled/"), + "results/macs2/H3K27ac_E10.5_overlap.narrowPeak", + "results/macs2/H3K27ac_E11.5_overlap.narrowPeak", + "results/macs2/H3K27ac_E12.5_overlap.narrowPeak", + "results/macs2/H3K27ac_E13.5_overlap.narrowPeak", + "results/macs2/H3K27ac_E14.5_overlap.narrowPeak", + "results/macs2/H3K27ac_E15.5_overlap.narrowPeak" # directory("results/multiqc/multiqc_report_data/"), # "results/multiqc/multiqc_report.html" @@ -267,22 +268,41 @@ rule mappingStats: shell("samtools flagstat {input.c} > {output.c}") shell("samtools flagstat {input.d} > {output.d}") -rule estimate_lib_complexity: - input: - "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam" - output: - qc = "results/qc/{sample}_{stage}_{mark}.pbc.qc", - log: - "logs/{sample}_{stage}_{mark}.pbc" - shell: - """ - bedtools bamtobed -bedpe -i {input} \ - | awk 'BEGIN{{OFS="\\t"}}{{print $1,$2,$4,$6,$9,$10}}' \ - | sort | uniq -c \ - | awk 'BEGIN{{mt=0;m0=0;m1=0;m2=0}} ($1==1){{m1=m1+1}} ($1==2){{m2=m2+1}} \ - {{m0=m0+1}} {{mt=mt+$1}} END{{printf "%d\\t%d\\t%d\\t%d\\t%f\\t%f\\t%f\\n" ,mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}}' \ - > {output.qc} - """ + +# rule sort_name: +# input: +# "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam" +# output: +# tmp = "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam" +# log: +# "logs/{sample}_{stage}_{mark}.pbc.sort" +# run: +# shell("samtools sort -n {input} -o {output.tmp} 2> {log}") +# +# rule estimate_lib_complexity: +# input: +# "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam" +# output: +# qc = "results/qc/{sample}_{stage}_{mark}.pbc.qc", +# log: +# "logs/{sample}_{stage}_{mark}.pbc" +# shell: +# """ +# bedtools bamtobed -i {input} \ +# | awk 'BEGIN{{OFS="\\t"}}{{print $1,$2,$4,$6}}' \ +# | sort | uniq -c \ +# | awk 'BEGIN{{mt=0;m0=0;m1=0;m2=0}} ($1==1){{m1=m1+1}} ($1==2){{m2=m2+1}} \ +# {{m0=m0+1}} {{mt=mt+$1}} END{{printf "%d\\t%d\\t%d\\t%d\\t%f\\t%f\\t%f\\n" ,mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}}' \ +# > {output.qc} +# """ +# +## convert to bedPE +## print read 1 scaffold, read 1 start coordinate, read 2 scaffold, read 2 end coordinate, strand read 1, strand read 2 +## remove mitochondrial genome +## sort by position and obtain uniq reads +## Format of file +## TotalReadPairs [tab] DistinctReadPairs [tab] OneReadPair [tab] TwoReadPairs [tab] NRF=Distinct/Total [tab] PBC1=OnePair/Distinct [tab] PBC2=OnePair/TwoPair + # =============================================================================================== # 5. deepTools @@ -470,23 +490,23 @@ rule call_peaks_macs2_pooled_replicates: E12C = "results/bwa/input_E12.5_H3K27ac_q30.sorted.dedup.bam", E13C = "results/bwa/input_E13.5_H3K27ac_q30.sorted.dedup.bam", E14C = "results/bwa/input_E14.5_H3K27ac_q30.sorted.dedup.bam", - E15C = "results/bwa/input_E15.5_H3K27ac_q30.sorted.dedup.bam", + E15C = "results/bwa/input_E15.5_H3K27ac_q30.sorted.dedup.bam" output: - directory("results/macs2/pooled") + directory("results/macs2/pooled/") log: E10 = "results/qc/E10.5_H3K27ac.pooled.macs2", E11 = "results/qc/E11.5_H3K27ac.pooled.macs2", E12 = "results/qc/E12.5_H3K27ac.pooled.macs2", E13 = "results/qc/E13.5_H3K27ac.pooled.macs2", E14 = "results/qc/E14.5_H3K27ac.pooled.macs2", - E15 = "results/qc/E15.5_H3K27ac.pooled.macs2", + E15 = "results/qc/E15.5_H3K27ac.pooled.macs2" params: E10 = "E10.5_H3K27ac.pooled.macs2", E11 = "E11.5_H3K27ac.pooled.macs2", E12 = "E12.5_H3K27ac.pooled.macs2", E13 = "E13.5_H3K27ac.pooled.macs2", E14 = "E14.5_H3K27ac.pooled.macs2", - E15 = "E15.5_H3K27ac.pooled.macs2", + E15 = "E15.5_H3K27ac.pooled.macs2" run: shell("macs2 callpeak -f BAM -t {input.E10} \ -c {input.E10C} --keep-dup all \ @@ -556,12 +576,12 @@ rule frip: rule overlap_peaks: input: - E10= expand(["results/macs2/{case}_vs_{control}_E10.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK), - E11= expand(["results/macs2/{case}_vs_{control}_E11.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK), - E12= expand(["results/macs2/{case}_vs_{control}_E12.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK), - E13= expand(["results/macs2/{case}_vs_{control}_E13.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK), - E14= expand(["results/macs2/{case}_vs_{control}_E14.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK), - E15= expand(["results/macs2/{case}_vs_{control}_E15.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK), + E10= ["results/macs2/ENCFF213EBC_vs_ENCFF157KEH_E10.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF548BRR_vs_ENCFF825AVI_E10.5_H3K27ac_macs2_peaks.narrowPeak"], + E11= ["results/macs2/ENCFF512SFE_vs_ENCFF184CUE_E11.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF515PKL_vs_ENCFF376FGM_E11.5_H3K27ac_macs2_peaks.narrowPeak"], + E12= ["results/macs2/ENCFF011NFM_vs_ENCFF058AUT_E12.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF394TZN_vs_ENCFF203JQV_E12.5_H3K27ac_macs2_peaks.narrowPeak"], + E13= ["results/macs2/ENCFF194ORC_vs_ENCFF117QRC_E13.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF290ZNF_vs_ENCFF248PGK_E13.5_H3K27ac_macs2_peaks.narrowPeak"], + E14= ["results/macs2/ENCFF902HAR_vs_ENCFF002HZV_E14.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF327VAO_vs_ENCFF784ORI_E14.5_H3K27ac_macs2_peaks.narrowPeak"], + E15= ["results/macs2/ENCFF707WKL_vs_ENCFF182XFG_E15.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF584JFB_vs_ENCFF727QTS_E15.5_H3K27ac_macs2_peaks.narrowPeak"], E10_pooled="results/macs2/E10.5_H3K27ac.pooled.macs2_peaks.narrowPeak", E11_pooled="results/macs2/E11.5_H3K27ac.pooled.macs2_peaks.narrowPeak", E12_pooled="results/macs2/E12.5_H3K27ac.pooled.macs2_peaks.narrowPeak", diff --git a/mouse/Snakefile_H3K4me3 b/mouse/Snakefile_H3K4me3 new file mode 100644 index 0000000..6e37aac --- /dev/null +++ b/mouse/Snakefile_H3K4me3 @@ -0,0 +1,663 @@ +## Author: Laura E Cook, University of Melbourne +## Purpose: ChIP-seq pipeline for histone marks +## This workflow only works for single-end reads + # 1. FastQC on raw reads + # 2. Alignment (downloaded from ENCODE) + # 3. Filtering + # 4. Alignment QC & Library Complexity + # 5. deepTools + # 7. cross correlation (SPP) + # 8. Call narrow peaks (MACS2) + # 9. Create consensus peaksets + # 10. Present QC for raw read, alignment, peak-calling in MultiQC + + +configfile: "configs/config_H3K4me3.yaml" +# Contains sample and genome information +# calls the SRR.txt file that contains the samples as either IP or control + +import csv +import os + +IPS = [] +INPUTS = [] +MARK = [] +STAGE = [] + +with open(config['SAMPLES'], "r") as f: + reader = csv.reader(f, delimiter = "\t") + # skip the header + header = next(reader) + for row in reader: + STAGE.append(row[2]) + MARK.append(row[1]) + IPS.append(row[3]) + INPUTS.append(row[4]) + +## multiple samples may use the same control input files +unique_inputs = list(set(INPUTS)) +unique_marks = list(set(MARK)) +unique_stages = list(set(STAGE)) + +## combine all samples +all_samples = IPS + INPUTS + +# =============================================================================================== +# Output targets +# =============================================================================================== + +rule all: + input: + expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/qc/{sample}_{stage}_{mark}.dedup.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/qc/{sample}_{stage}_{mark}.dupmark.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/qc/{sample}_{stage}_{mark}.q30.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/qc/{sample}_{stage}_{mark}.unfiltered.flagstat.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), + "results/bwa/E10.5_H3K4me3_q30.sorted.pooled.dedup.bam", + "results/bwa/E11.5_H3K4me3_q30.sorted.pooled.dedup.bam", + "results/bwa/E12.5_H3K4me3_q30.sorted.pooled.dedup.bam", + "results/bwa/E13.5_H3K4me3_q30.sorted.pooled.dedup.bam", + "results/bwa/E14.5_H3K4me3_q30.sorted.pooled.dedup.bam", + "results/bwa/E15.5_H3K4me3_q30.sorted.pooled.dedup.bam", + "results/bwa/input_E10.5_H3K4me3_q30.sorted.dedup.bam", + "results/bwa/input_E11.5_H3K4me3_q30.sorted.dedup.bam", + "results/bwa/input_E12.5_H3K4me3_q30.sorted.dedup.bam", + "results/bwa/input_E13.5_H3K4me3_q30.sorted.dedup.bam", + "results/bwa/input_E14.5_H3K4me3_q30.sorted.dedup.bam", + "results/bwa/input_E15.5_H3K4me3_q30.sorted.dedup.bam", + "logs/E10.5_H3K4me3.mergeBAM", + "logs/E11.5_H3K4me3.mergeBAM", + "logs/E12.5_H3K4me3.mergeBAM", + "logs/E13.5_H3K4me3.mergeBAM", + "logs/E14.5_H3K4me3.mergeBAM", + "logs/E15.5_H3K4me3.mergeBAM", + "logs/input_E10.5_H3K4me3.mergeBAM", + "logs/input_E11.5_H3K4me3.mergeBAM", + "logs/input_E12.5_H3K4me3.mergeBAM", + "logs/input_E13.5_H3K4me3.mergeBAM", + "logs/input_E14.5_H3K4me3.mergeBAM", + "logs/input_E15.5_H3K4me3.mergeBAM", + # "results/qc/multibamsum.npz", + # "results/qc/multibamsum.tab", + # "results/qc/pearsoncor_multibamsum.png", + # "results/qc/pearsoncor_multibamsum_matrix.txt", + # expand("results/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK), + # "results/qc/multiBAM_fingerprint.png", + # "results/qc/multiBAM_fingerprint_metrics.txt", + # "results/qc/multiBAM_fingerprint_rawcounts.txt", + # "results/qc/plot_coverage.png", + # "results/qc/plot_coverage_rawcounts.tab", + #expand("results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam", zip, sample=all_samples, stage=STAGE, mark=MARK), + #expand("logs/{sample}_{stage}_{mark}.pbc.sort", zip, sample=all_samples, stage=STAGE, mark=MARK), + #expand("results/qc/{sample}_{stage}_{mark}.pbc.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc",zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK), + expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + expand("logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + expand("results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed", zip, case=IPS, stage=STAGE, mark=MARK), + expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK), + expand("results/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), + directory("results/macs2/pooled/"), + "results/macs2/H3K4me3_E10.5_overlap.narrowPeak", + "results/macs2/H3K4me3_E11.5_overlap.narrowPeak", + "results/macs2/H3K4me3_E12.5_overlap.narrowPeak", + "results/macs2/H3K4me3_E13.5_overlap.narrowPeak", + "results/macs2/H3K4me3_E14.5_overlap.narrowPeak", + "results/macs2/H3K4me3_E15.5_overlap.narrowPeak" + # directory("results/multiqc/multiqc_report_data/"), + # "results/multiqc/multiqc_report.html" + +# =============================================================================================== +# 1. FASTQC +# =============================================================================================== + +## Done separately because fastq and BAM files don't share file prefixes so it becomes very messy +# without renaming everything + +# =============================================================================================== +# 2. ALIGNMENT +# > ENCODE alignment downloaded from ENCODE database +# =============================================================================================== + +## Singled-end 36-50nt reads +## bwa-aln used for aligning reads to the mouse genome (mm10 assembly) +## Alignment command: bwa-aln -q 5 -l 32 -k 2 <reference_file> <read_file> +## bwa version version 0.7.7 + +## -q = parameter for read trimming +## -l = read length +## -k = Maximum edit distance in the seed + +# =============================================================================================== +# 4. FILTERING +# > remove unmapped, mate unmapped +# > remove low MAPQ reads (-q 30) +# > -F 1804 +# -Read unmapped +# -Mate unmapped +# -Not primary alignment +# -Read fails platform/vendor quality checks +# -Read is PCR or optical duplicate +# > mark duplicates & remove duplicates (picard) +# > re-filter, sort and index final BAM +# =============================================================================================== + +rule filter: + input: + "results/bwa/{sample}_{stage}_{mark}.bam" + output: + "results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam" + log: + "logs/{sample}_{stage}_{mark}.filter" + shell: + "samtools view -b -F 1804 -q 30 {input} | samtools sort -o {output} - 2> {log}" + +rule markDups: + input: + "results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam" + output: + bam="results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam", + dupQC="results/bwa/{sample}_{stage}_{mark}.dupmark.qc" + log: + "logs/{sample}_{stage}_{mark}.dedup" + shell: + "picard MarkDuplicates I={input} O={output.bam} \ + METRICS_FILE={output.dupQC} REMOVE_DUPLICATES=false ASSUME_SORTED=true 2> {log}" + +rule dedup: + input: + "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam" + output: + "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam" + log: + "logs/{sample}_{stage}_{mark}.dedup" + shell: + "samtools view -F 1804 -b {input} | samtools sort -o {output} - 2> {log}" + +rule indexBam: + input: + "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam" + output: + "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai" + log: + "logs/{sample}_{stage}_{mark}.indexBam" + shell: + "samtools index {input} {output} 2> {log}" + +rule mergeBAMreplicates: + input: + E10 = ["results/bwa/ENCFF124UYX_E10.5_{mark}_q30.sorted.dedup.bam", "results/bwa/ENCFF045IPK_E10.5_{mark}_q30.sorted.dedup.bam"], + E11 = ["results/bwa/ENCFF760QYZ_E11.5_{mark}_q30.sorted.dedup.bam", "results/bwa/ENCFF717QDV_E11.5_{mark}_q30.sorted.dedup.bam"], + E12 = ["results/bwa/ENCFF182ZPF_E12.5_{mark}_q30.sorted.dedup.bam", "results/bwa/ENCFF941QJZ_E12.5_{mark}_q30.sorted.dedup.bam"], + E13 = ["results/bwa/ENCFF485UDC_E13.5_{mark}_q30.sorted.dedup.bam", "results/bwa/ENCFF124TAB_E13.5_{mark}_q30.sorted.dedup.bam"], + E14 = ["results/bwa/ENCFF724DMU_E14.5_{mark}_q30.sorted.dedup.bam", "results/bwa/ENCFF665QBJ_E14.5_{mark}_q30.sorted.dedup.bam"], + E15 = ["results/bwa/ENCFF258KCR_E15.5_{mark}_q30.sorted.dedup.bam", "results/bwa/ENCFF401BKM_E15.5_{mark}_q30.sorted.dedup.bam"], + E10C = ["results/bwa/ENCFF157KEH_E10.5_H3K4me3_q30.sorted.dedup.bam", "results/bwa/ENCFF825AVI_E10.5_H3K4me3_q30.sorted.dedup.bam"], + E11C = ["results/bwa/ENCFF184CUE_E11.5_H3K4me3_q30.sorted.dedup.bam", "results/bwa/ENCFF376FGM_E11.5_H3K4me3_q30.sorted.dedup.bam"], + E12C = ["results/bwa/ENCFF203JQV_E12.5_H3K4me3_q30.sorted.dedup.bam", "results/bwa/ENCFF058AUT_E12.5_H3K4me3_q30.sorted.dedup.bam"], + E13C = ["results/bwa/ENCFF117QRC_E13.5_H3K4me3_q30.sorted.dedup.bam", "results/bwa/ENCFF248PGK_E13.5_H3K4me3_q30.sorted.dedup.bam"], + E14C = ["results/bwa/ENCFF784ORI_E14.5_H3K4me3_q30.sorted.dedup.bam", "results/bwa/ENCFF002HZV_E14.5_H3K4me3_q30.sorted.dedup.bam"], + E15C = ["results/bwa/ENCFF727QTS_E15.5_H3K4me3_q30.sorted.dedup.bam", "results/bwa/ENCFF182XFG_E15.5_H3K4me3_q30.sorted.dedup.bam"], + output: + E10 = "results/bwa/E10.5_H3K4me3_q30.sorted.pooled.dedup.bam", + E11 = "results/bwa/E11.5_H3K4me3_q30.sorted.pooled.dedup.bam", + E12 = "results/bwa/E12.5_H3K4me3_q30.sorted.pooled.dedup.bam", + E13 = "results/bwa/E13.5_H3K4me3_q30.sorted.pooled.dedup.bam", + E14 = "results/bwa/E14.5_H3K4me3_q30.sorted.pooled.dedup.bam", + E15 = "results/bwa/E15.5_H3K4me3_q30.sorted.pooled.dedup.bam", + E10C = "results/bwa/input_E10.5_H3K4me3_q30.sorted.dedup.bam", + E11C = "results/bwa/input_E11.5_H3K4me3_q30.sorted.dedup.bam", + E12C = "results/bwa/input_E12.5_H3K4me3_q30.sorted.dedup.bam", + E13C = "results/bwa/input_E13.5_H3K4me3_q30.sorted.dedup.bam", + E14C = "results/bwa/input_E14.5_H3K4me3_q30.sorted.dedup.bam", + E15C = "results/bwa/input_E15.5_H3K4me3_q30.sorted.dedup.bam" + log: + E10 = "logs/E10.5_H3K4me3.mergeBAM", + E11 = "logs/E11.5_H3K4me3.mergeBAM", + E12 = "logs/E12.5_H3K4me3.mergeBAM", + E13 = "logs/E13.5_H3K4me3.mergeBAM", + E14 = "logs/E14.5_H3K4me3.mergeBAM", + E15 = "logs/E15.5_H3K4me3.mergeBAM", + E10C = "logs/input_E10.5_H3K4me3.mergeBAM", + E11C = "logs/input_E11.5_H3K4me3.mergeBAM", + E12C = "logs/input_E12.5_H3K4me3.mergeBAM", + E13C = "logs/input_E13.5_H3K4me3.mergeBAM", + E14C = "logs/input_E14.5_H3K4me3.mergeBAM", + E15C = "logs/input_E15.5_H3K4me3.mergeBAM" + run: + shell("samtools merge {output.E10} {input.E10} 2> {log.E10}") + shell("samtools merge {output.E11} {input.E11} 2> {log.E11}") + shell("samtools merge {output.E12} {input.E12} 2> {log.E12}") + shell("samtools merge {output.E13} {input.E13} 2> {log.E13}") + shell("samtools merge {output.E14} {input.E14} 2> {log.E14}") + shell("samtools merge {output.E15} {input.E15} 2> {log.E15}") + shell("samtools merge {output.E10C} {input.E10C} 2> {log.E10C}") + shell("samtools merge {output.E11C} {input.E11C} 2> {log.E11C}") + shell("samtools merge {output.E12C} {input.E12C} 2> {log.E12C}") + shell("samtools merge {output.E13C} {input.E13C} 2> {log.E13C}") + shell("samtools merge {output.E14C} {input.E14C} 2> {log.E14C}") + shell("samtools merge {output.E15C} {input.E15C} 2> {log.E15C}") + + +# =============================================================================================== +# 4. ALIGNMENT QC +# > SAMtools flagstat statistics +# > CollectMultipleMetrics (picard) +# > Compute Library Complexity (PreSeq) +# =============================================================================================== + +rule mappingStats: + input: + a="results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", + b="results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam", + c="results/bwa/{sample}_{stage}_{mark}_q30.sorted.bam", + d="results/bwa/{sample}_{stage}_{mark}.bam" + output: + a="results/qc/{sample}_{stage}_{mark}.dedup.flagstat.qc", + b="results/qc/{sample}_{stage}_{mark}.dupmark.flagstat.qc", + c="results/qc/{sample}_{stage}_{mark}.q30.flagstat.qc", + d="results/qc/{sample}_{stage}_{mark}.unfiltered.flagstat.qc", + run: + shell("samtools flagstat {input.a} > {output.a}") + shell("samtools flagstat {input.b} > {output.b}") + shell("samtools flagstat {input.c} > {output.c}") + shell("samtools flagstat {input.d} > {output.d}") + + +# rule sort_name: +# input: +# "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam" +# output: +# tmp = "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam" +# log: +# "logs/{sample}_{stage}_{mark}.pbc.sort" +# run: +# shell("samtools sort -n {input} -o {output.tmp} 2> {log}") +# +# rule estimate_lib_complexity: +# input: +# "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam" +# output: +# qc = "results/qc/{sample}_{stage}_{mark}.pbc.qc", +# log: +# "logs/{sample}_{stage}_{mark}.pbc" +# shell: +# """ +# bedtools bamtobed -i {input} \ +# | awk 'BEGIN{{OFS="\\t"}}{{print $1,$2,$4,$6}}' \ +# | sort | uniq -c \ +# | awk 'BEGIN{{mt=0;m0=0;m1=0;m2=0}} ($1==1){{m1=m1+1}} ($1==2){{m2=m2+1}} \ +# {{m0=m0+1}} {{mt=mt+$1}} END{{printf "%d\\t%d\\t%d\\t%d\\t%f\\t%f\\t%f\\n" ,mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}}' \ +# > {output.qc} +# """ +# +## convert to bedPE +## print read 1 scaffold, read 1 start coordinate, read 2 scaffold, read 2 end coordinate, strand read 1, strand read 2 +## remove mitochondrial genome +## sort by position and obtain uniq reads +## Format of file +## TotalReadPairs [tab] DistinctReadPairs [tab] OneReadPair [tab] TwoReadPairs [tab] NRF=Distinct/Total [tab] PBC1=OnePair/Distinct [tab] PBC2=OnePair/TwoPair + + +# =============================================================================================== +# 5. deepTools +# > multiBAMsummary +# > plotCorrelation +# > plotFingerprint +# > bamCoverage (read depth normalised bigWig files) +# =============================================================================================== + +rule deeptools_summary: + input: + expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK) + output: + sum="results/qc/multibamsum.npz", + counts="results/qc/multibamsum.tab" + threads: 32 + log: + "logs/multisummary.deepTools" + shell: + " multiBamSummary bins \ + -p {threads} \ + -b {input} \ + --centerReads \ + -out {output.sum} \ + --outRawCounts {output.counts} 2> {log}" + +rule deeptools_correlation: + input: "results/qc/multibamsum.npz" + output: + fig="results/qc/pearsoncor_multibamsum.png", + matrix="results/qc/pearsoncor_multibamsum_matrix.txt" + log: + "logs/correlation.deepTools" + shell: + "plotCorrelation \ + --corData {input} \ + --plotFile {output.fig} \ + --outFileCorMatrix {output.matrix} \ + --corMethod pearson \ + --whatToPlot heatmap \ + --skipZeros \ + --plotNumbers \ + --colorMap RdYlBu 2> {log}" + +rule deeptools_coverage: + input: + bam = "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam", + bai = "results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai" + output: + "results/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw" + log: + "logs/{sample}_{stage}_{mark}_coverage.deepTools" + shell: + "bamCoverage --bam {input.bam} --binSize 10 --normalizeUsing RPGC --effectiveGenomeSize 2308125349 --numberOfProcessors 4 -o {output} 2> {log}" + +rule deeptools_fingerprint: + input: + bam = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark = MARK), + bai = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark = MARK) + output: + fig="results/qc/multiBAM_fingerprint.png", + metrics="results/qc/multiBAM_fingerprint_metrics.txt", + rawcounts="results/qc/multiBAM_fingerprint_rawcounts.txt" + threads: 32 + log: + "logs/fingerprint.deepTools" + shell: + "plotFingerprint -p {threads} \ + -b {input.bam} \ + --plotFile {output.fig} \ + --outQualityMetrics {output.metrics} \ + --outRawCounts {output.rawcounts} \ + --minMappingQuality 20 \ + --skipZeros \ + --centerReads 2> {log}" + +rule deeptools_plotCoverage: + input: + bam = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bam"], zip, sample=all_samples, stage=STAGE, mark=MARK), + bai = expand(["results/bwa/{sample}_{stage}_{mark}_q30.sorted.dedup.bai"], zip, sample=all_samples, stage=STAGE, mark=MARK) + output: + fig="results/qc/plot_coverage.png", + rawcounts="results/qc/plot_coverage_rawcounts.tab" + log: + "logs/plotCoverage.deepTools" + shell: + "plotCoverage --bamfiles {input.bam} --plotFile {output.fig} \ + -n 1000000 \ + --outRawCounts {output.rawcounts} \ + --ignoreDuplicates 2> {log}" + + +# =============================================================================================== +# 6. Cross-correlation scores - following ENCODE code +# =============================================================================================== + +rule bamtobed_crossC: + input: + "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam" + output: + tagAlign = "results/bed/{sample}_{stage}_{mark}_q30_SE.tagAlign.gz" + shell: + """ + bedtools bamtobed -i {input} | \ + awk 'BEGIN{{OFS="\\t"}}{{$4='N';$5='1000';print $0}}' |\ + gzip -nc > {output.tagAlign} + """ + +## Subsample 15 million reads for cross-correlation analysis +## Estimate read length from first 100 reads +rule subsample_aligned_reads: + input: + "results/bed/{sample}_{stage}_{mark}_q30_SE.tagAlign.gz" + output: + subsample = "results/bed/{sample}_{stage}_{mark}.filt.sample.15Mreads.SE.tagAlign.gz", + tmp = "results/bed/{sample}_{stage}_{mark}_R1_trimmed_q30_SE.tagAlign.tmp" + params: + nreads= 15000000 + run: + shell("""zcat {input} | shuf -n {params.nreads} --random-source=<(openssl enc -aes-256-ctr -pass pass:$(zcat -f {input} | wc -c) -nosalt </dev/zero 2>/dev/null) | gzip -nc > {output.subsample}""") + shell("zcat {input} > {output.tmp}") + +# Determine exclusion range for fragment length estimation. +## From bamPEFragmentSize (deepTools) average fragment length is ~220bp +## See bamPEFragmentSize.histogram.png +# Use a fixed lowerbound at -500. +# Upperbound E +#EXCLUSION_RANGE_MAX is +# Histone ChIP-seq: max(read_len + 10, 100) +# lowerbound is fixed at 500 for both + + +rule cross_correlation_SSP: + input: + "results/bed/{sample}_{stage}_{mark}.filt.sample.15Mreads.SE.tagAlign.gz" + output: + CC_SCORES_FILE="results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc", + CC_PLOT_FILE="results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf" + log: + "logs/{sample}_{stage}_{mark}_filt_15Mreads.SE.spp.log" + params: + EXCLUSION_RANGE_MIN=-500, + EXCLUSION_RANGE_MAX=85 + shell: + "Rscript scripts/run_spp.R -c={input} -savp={output.CC_PLOT_FILE} -out={output.CC_SCORES_FILE} -x={params.EXCLUSION_RANGE_MIN}:{params.EXCLUSION_RANGE_MAX} 2> {log}" + + +# CC_SCORE FILE format +# Filename <tab> numReads <tab> estFragLen <tab> corr_estFragLen <tab> PhantomPeak <tab> corr_phantomPeak <tab> argmin_corr <tab> min_corr <tab> phantomPeakCoef <tab> relPhantomPeakCoef <tab> QualityTag + + +# =============================================================================================== +# 7. Call peaks (MACS2) +# =============================================================================================== + +rule call_peaks_macs2: + input: + control = "results/bwa/{control}_{stage}_{mark}_q30.sorted.dedup.bam", + case = "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam" + output: + "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.xls", + "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_summits.bed", + "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", + log: + "logs/{case}_vs_{control}_{stage}_{mark}_call_peaks_macs2.log" + params: + name = "{case}_vs_{control}_{stage}_{mark}_macs2", + shell: + " macs2 callpeak -f BAM -t {input.case} \ + -c {input.control} --keep-dup all \ + --outdir results/macs2/ -p 0.01 \ + -n {params.name} \ + -g mm 2> {log} " + +rule call_peaks_macs2_pooled_replicates: + input: + E10 = "results/bwa/E10.5_H3K4me3_q30.sorted.pooled.dedup.bam", + E11 = "results/bwa/E11.5_H3K4me3_q30.sorted.pooled.dedup.bam", + E12 = "results/bwa/E12.5_H3K4me3_q30.sorted.pooled.dedup.bam", + E13 = "results/bwa/E13.5_H3K4me3_q30.sorted.pooled.dedup.bam", + E14 = "results/bwa/E14.5_H3K4me3_q30.sorted.pooled.dedup.bam", + E15 = "results/bwa/E15.5_H3K4me3_q30.sorted.pooled.dedup.bam", + E10C = "results/bwa/input_E10.5_H3K4me3_q30.sorted.dedup.bam", + E11C = "results/bwa/input_E11.5_H3K4me3_q30.sorted.dedup.bam", + E12C = "results/bwa/input_E12.5_H3K4me3_q30.sorted.dedup.bam", + E13C = "results/bwa/input_E13.5_H3K4me3_q30.sorted.dedup.bam", + E14C = "results/bwa/input_E14.5_H3K4me3_q30.sorted.dedup.bam", + E15C = "results/bwa/input_E15.5_H3K4me3_q30.sorted.dedup.bam" + output: + directory("results/macs2/pooled/") + log: + E10 = "results/qc/E10.5_H3K4me3.pooled.macs2", + E11 = "results/qc/E11.5_H3K4me3.pooled.macs2", + E12 = "results/qc/E12.5_H3K4me3.pooled.macs2", + E13 = "results/qc/E13.5_H3K4me3.pooled.macs2", + E14 = "results/qc/E14.5_H3K4me3.pooled.macs2", + E15 = "results/qc/E15.5_H3K4me3.pooled.macs2" + params: + E10 = "E10.5_H3K4me3.pooled.macs2", + E11 = "E11.5_H3K4me3.pooled.macs2", + E12 = "E12.5_H3K4me3.pooled.macs2", + E13 = "E13.5_H3K4me3.pooled.macs2", + E14 = "E14.5_H3K4me3.pooled.macs2", + E15 = "E15.5_H3K4me3.pooled.macs2" + run: + shell("macs2 callpeak -f BAM -t {input.E10} \ + -c {input.E10C} --keep-dup all \ + --outdir results/macs2/ -p 0.01 \ + -n {params.E10} \ + -g mm 2> {log.E10}" ) + shell("macs2 callpeak -f BAM -t {input.E11} \ + -c {input.E11C} --keep-dup all \ + --outdir results/macs2/ -p 0.01 \ + -n {params.E11} \ + -g mm 2> {log.E11}" ) + shell("macs2 callpeak -f BAM -t {input.E12} \ + -c {input.E12C} --keep-dup all \ + --outdir results/macs2/ -p 0.01 \ + -n {params.E12} \ + -g mm 2> {log.E12}" ) + shell("macs2 callpeak -f BAM -t {input.E13} \ + -c {input.E13C} --keep-dup all \ + --outdir results/macs2/ -p 0.01 \ + -n {params.E13} \ + -g mm 2> {log.E13}" ) + shell("macs2 callpeak -f BAM -t {input.E14} \ + -c {input.E14C} --keep-dup all \ + --outdir results/macs2/ -p 0.01 \ + -n {params.E14} \ + -g mm 2> {log.E14}" ) + shell("macs2 callpeak -f BAM -t {input.E15} \ + -c {input.E15C} --keep-dup all \ + --outdir results/macs2/ -p 0.01 \ + -n {params.E15} \ + -g mm 2> {log.E15}" ) + +# =============================================================================================== +# 8. Peak QC +# > narrow peak counts +# > fraction of reads in peaks (convert BAM to bed first then do intersect with peaks) +# =============================================================================================== + +## Convert BAM to tagAlign file for calculating FRiP QC metric (Fraction of reads in peaks) +rule bamToBed: + input: + "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bam" + output: + "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed" + log: + "logs/{case}_{stage}_{mark}.bamToBed" + shell: + "samtools sort -n {input} | bedtools bamtobed -i - > {output}" + +# ## Fraction of reads in peaks +rule frip: + input: + bed = "results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed", + peak = "results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak" + output: + "results/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt" + shell: + "python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}" + + +# =============================================================================================== +# 9. Create consensus peaksets for replicates +# =============================================================================================== + +# Based on ENCODE `overlap_peaks.py` - recommended for histone marks. +# Need to pool peaks first for each replicate + +rule overlap_peaks: + input: + E10= ["results/macs2/ENCFF124UYX_vs_ENCFF157KEH_E10.5_H3K4me3_macs2_peaks.narrowPeak", "results/macs2/ENCFF045IPK_vs_ENCFF825AVI_E10.5_H3K4me3_macs2_peaks.narrowPeak"], + E11= ["results/macs2/ENCFF760QYZ_vs_ENCFF184CUE_E11.5_H3K4me3_macs2_peaks.narrowPeak", "results/macs2/ENCFF717QDV_vs_ENCFF376FGM_E11.5_H3K4me3_macs2_peaks.narrowPeak"], + E12= ["results/macs2/ENCFF941QJZ_vs_ENCFF058AUT_E12.5_H3K4me3_macs2_peaks.narrowPeak", "results/macs2/ENCFF182ZPF_vs_ENCFF203JQV_E12.5_H3K4me3_macs2_peaks.narrowPeak"], + E13= ["results/macs2/ENCFF485UDC_vs_ENCFF117QRC_E13.5_H3K4me3_macs2_peaks.narrowPeak", "results/macs2/ENCFF124TAB_vs_ENCFF248PGK_E13.5_H3K4me3_macs2_peaks.narrowPeak"], + E14= ["results/macs2/ENCFF665QBJ_vs_ENCFF002HZV_E14.5_H3K4me3_macs2_peaks.narrowPeak", "results/macs2/ENCFF724DMU_vs_ENCFF784ORI_E14.5_H3K4me3_macs2_peaks.narrowPeak"], + E15= ["results/macs2/ENCFF401BKM_vs_ENCFF182XFG_E15.5_H3K4me3_macs2_peaks.narrowPeak", "results/macs2/ENCFF258KCR_vs_ENCFF727QTS_E15.5_H3K4me3_macs2_peaks.narrowPeak"], + E10_pooled="results/macs2/E10.5_H3K4me3.pooled.macs2_peaks.narrowPeak", + E11_pooled="results/macs2/E11.5_H3K4me3.pooled.macs2_peaks.narrowPeak", + E12_pooled="results/macs2/E12.5_H3K4me3.pooled.macs2_peaks.narrowPeak", + E13_pooled="results/macs2/E13.5_H3K4me3.pooled.macs2_peaks.narrowPeak", + E14_pooled="results/macs2/E14.5_H3K4me3.pooled.macs2_peaks.narrowPeak", + E15_pooled="results/macs2/E15.5_H3K4me3.pooled.macs2_peaks.narrowPeak", + output: + E10= "results/macs2/H3K4me3_E10.5_overlap.narrowPeak", + E11= "results/macs2/H3K4me3_E11.5_overlap.narrowPeak", + E12= "results/macs2/H3K4me3_E12.5_overlap.narrowPeak", + E13= "results/macs2/H3K4me3_E13.5_overlap.narrowPeak", + E14= "results/macs2/H3K4me3_E14.5_overlap.narrowPeak", + E15= "results/macs2/H3K4me3_E15.5_overlap.narrowPeak" + run: + shell("python2.7 scripts/overlap_peaks.py {input.E10} {input.E10_pooled} {output.E10}") + shell("python2.7 scripts/overlap_peaks.py {input.E11} {input.E11_pooled} {output.E11}") + shell("python2.7 scripts/overlap_peaks.py {input.E12} {input.E12_pooled} {output.E12}") + shell("python2.7 scripts/overlap_peaks.py {input.E13} {input.E13_pooled} {output.E13}") + shell("python2.7 scripts/overlap_peaks.py {input.E14} {input.E14_pooled} {output.E14}") + shell("python2.7 scripts/overlap_peaks.py {input.E15} {input.E15_pooled} {output.E15}") + +# =============================================================================================== +# 10. Combine all QC into multiqc output +# =============================================================================================== +# +# rule multiqc: +# input: + # fastqc + # expand("results/fastqc/{sample}_R1_fastqc.html", sample=all_samples), + # expand("results/fastqc/{sample}_R2_fastqc.html", sample=all_samples), + # expand("results/fastqc/{sample}_R1_fastqc.zip", sample=all_samples), + # expand("results/fastqc/{sample}_R2_fastqc.zip", sample=all_samples), + # # bwa + # expand("logs/{sample}.align", sample=all_samples), + # expand("logs/{sample}.flagstat.qc", sample=all_samples), + # # preseq + # expand("results/preseq/{sample}.ccurve.txt", sample=all_samples), + # # picard + # expand("results/picard/{sample}_est_lib_complex_metrics.txt", sample=all_samples), + # # deepTools + # "results/deeptools/multibamsum.npz", + # "results/deeptools/multibamsum.tab", + # "results/deeptools/pearsoncor_multibamsum.png", + # "results/deeptools/pearsoncor_multibamsum_matrix.txt", + # expand("results/deeptools/{sample}.SeqDepthNorm.bw", sample=all_samples), + # "results/deeptools/multiBAM_fingerprint.png", + # "results/deeptools/multiBAM_fingerprint_metrics.txt", + # "results/deeptools/multiBAM_fingerprint_rawcounts.txt", + # "results/deeptools/plot_coverage.png", + # "results/deeptools/plot_coverage_rawcounts.tab", + # "results/deeptools/bamPEFragmentSize_hist.png", + # "results/deeptools/bamPEFragmentSize_rawcounts.tab", + # # phantomPeaks + # expand("results/phantomPeaks/{sample}.spp.pdf", sample = IPS), + # expand("results/phantomPeaks/{sample}.spp.Rdata", sample = IPS), + # expand("results/phantomPeaks/{sample}.spp.out", sample = IPS), + # # macs2 + # expand("results/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS), + # expand("results/macs2/{case}_vs_{control}_macs2_peaks.xls", zip, case=IPS, control=INPUTS), + # expand("results/macs2/{case}_vs_{control}_macs2_summits.bed", zip, case=IPS, control=INPUTS), + # expand("results/macs2/{case}-vs-{control}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS), + # expand("results/frip/{case}.frip.txt", case=IPS) + # params: + # "results/fastqc/", + # "results/bwa/", + # "logs/", + # "results/deeptools/", + # "results/macs2/", + # "results/phantomPeaks/", + # "results/frip/", + # "results/picard/" + # output: + # directory("results/multiqc/multiqc_report_data/"), + # "results/multiqc/multiqc_report.html", + # log: + # "logs/multiqc.log" + # shell: + # "multiqc -f {params} -m fastqc -m samtools -m picard -m preseq -m deeptools -m phantompeakqualtools -m macs2 -o results/multiqc/ \ + # -n multiqc_report.html 2> {log}" -- GitLab