From 5f3ac1e34da2c828c26bc0ad74cf05fdf6dca6ef Mon Sep 17 00:00:00 2001 From: Laura Cook <l.cook2@student.unimelb.edu.au> Date: Fri, 24 Jul 2020 17:05:11 +1000 Subject: [PATCH] rule to compute NRF, PBC1, PBC2 and rule to compute NSC and RSC --- dunnart/Snakefile | 416 +++++++++++++++++++++++++++++----------------- 1 file changed, 263 insertions(+), 153 deletions(-) diff --git a/dunnart/Snakefile b/dunnart/Snakefile index 2adb620..41cccb9 100644 --- a/dunnart/Snakefile +++ b/dunnart/Snakefile @@ -52,14 +52,20 @@ rule all: expand("results/qc/{sample}_R1_fastqc.zip", sample=all_samples), expand("results/qc/{sample}_R2_fastqc.zip", sample=all_samples), expand("results/bowtie2/{sample}.sorted.bam", sample=all_samples), + expand("results/bowtie2/{sample}.sorted.bai", sample=all_samples), expand("results/bowtie2/{sample}_PPq30.sorted.bam", sample=all_samples), expand("results/bowtie2/{sample}_PPq30.sorted.dupmark.bam", sample=all_samples), expand("results/bowtie2/{sample}_PPq30.sorted.dedup.bam", sample=all_samples), expand("results/bowtie2/{sample}_PPq30.sorted.dedup.bai", sample=all_samples), - expand("results/qc/{sample}.flagstat.qc", sample=all_samples), + expand("results/qc/{sample}.sorted.flagstat.qc", sample=all_samples), + expand("results/qc/{sample}.dedup.flagstat.qc", sample=all_samples), + expand("results/qc/{sample}.dupmark.flagstat.qc", sample=all_samples), + expand("results/qc/{sample}.PPq30.flagstat.qc", sample=all_samples), expand("results/qc/{sample}.ccurve.txt", sample=all_samples), expand("results/qc/{sample}_est_lib_complex_metrics.txt", sample=all_samples), expand("logs/{sample}.picardLibComplexity", sample=all_samples), + expand("results/qc/{sample}.pbc.qc", sample=all_samples), + expand("results/bowtie2/{sample}_PPq30.sorted.tmp.bam",sample=all_samples), "results/qc/multibamsum.npz", "results/qc/multibamsum.tab", "results/qc/pearsoncor_multibamsum.png", @@ -68,26 +74,25 @@ rule all: "results/qc/multiBAM_fingerprint.png", "results/qc/multiBAM_fingerprint_metrics.txt", "results/qc/multiBAM_fingerprint_rawcounts.txt", - "results/qc/plot_coverage.png", - "results/qc/plot_coverage_rawcounts.tab", "results/qc/bamPEFragmentSize_hist.png", "results/qc/bamPEFragmentSize_rawcounts.tab", - expand("results/qc/{sample}.spp.pdf", sample = all_samples), - expand("results/qc/{sample}.spp.Rdata", sample = all_samples), - expand("results/qc/{sample}.spp.out", sample = all_samples), - expand("results/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS), - expand("results/macs2/{case}_vs_{control}_macs2_peaks.xls", zip, case=IPS, control=INPUTS), - expand("results/macs2/{case}_vs_{control}_macs2_summits.bed", zip, case=IPS, control=INPUTS), - expand("results/qxc{case}-vs-{control}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS), - expand("results/bowtie2/{case}.bedpe", case=IPS), - expand("logs/{case}.bamToBed", case=IPS), - expand("results/qc/{case}_vs_{control}.frip.txt", case=IPS, control=INPUTS), - "results/macs2/H3K4me3_pooled_macs2_peaks.narrowPeak", - "results/macs2/H3K27ac_pooled_macs2_peaks.narrowPeak", - "results/macs2/H3K4me3_overlap.narrowPeak", - "results/macs2/H3K27ac_overlap.narrowPeak", - directory("results/multiqc/multiqc_report_data/"), - "results/multiqc/multiqc_report.html" + expand("results/qc/{sample}_R1_trimmed.flagstat.qc", sample=all_samples), + expand("results/bowtie2/{sample}_R1_trimmed_q30.bam", sample=all_samples), + expand("{sample}_filt_15Mreads.SE.cc.qc", sample=all_samples), + expand("{sample}_filt_15Mreads.SE.cc.plot.pdf", sample=all_samples) + # expand("results/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS), + # expand("results/macs2/{case}_vs_{control}_macs2_peaks.xls", zip, case=IPS, control=INPUTS), + # expand("results/macs2/{case}_vs_{control}_macs2_summits.bed", zip, case=IPS, control=INPUTS), + # expand("results/qxc{case}-vs-{control}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS), + # expand("results/bowtie2/{case}.bedpe", case=IPS), + # expand("logs/{case}.bamToBed", case=IPS), + # expand("results/qc/{case}_vs_{control}.frip.txt", case=IPS, control=INPUTS) + # "results/macs2/H3K4me3_pooled_macs2_peaks.narrowPeak", + # "results/macs2/H3K27ac_pooled_macs2_peaks.narrowPeak", + # "results/macs2/H3K4me3_overlap.narrowPeak", + # "results/macs2/H3K27ac_overlap.narrowPeak", + # directory("results/multiqc/multiqc_report_data/"), + # "results/multiqc/multiqc_report.html" # =============================================================================================== # 1. FASTQC # =============================================================================================== @@ -103,7 +108,7 @@ rule fastqc: log: "logs/{sample}.fastqc" shell: - "fastqc {input} -t 6 --extract --outdir=results/fastqc/ 2> {log}" + "fastqc {input} -t 6 --extract --outdir=results/qc/ 2> {log}" # =============================================================================================== # 2. ALIGNMENT @@ -117,12 +122,12 @@ rule align: output: "results/bowtie2/{sample}.sorted.bam" params: - index="genomes/dunnart_pseudochr_vs_mSarHar1.11_v1" + index="genomes/Scras_dunnart_assem1.0_pb-ont-illsr_flyeassem_red-rd-scfitr2_pil2xwgs2_60chr" log: "logs/{sample}.align" shell: - "bowtie2 --threads 8 -q -X 1000 --very-sensitive -x {params.index} -1 {input.R1} -2 {input.R2} \ - | samtools view -u -h - | samtools sort -o {output} - 2> {log}" + "bowtie2 --threads 8 -q -X 2000 --very-sensitive -x {params.index} -1 {input.R1} -2 {input.R2} \ + | samtools view -u -h - | samtools sort -o {output} - 2> {log}" # =============================================================================================== @@ -150,6 +155,16 @@ rule filter: shell: "samtools view -b -q 30 -F 1804 -f 2 {input} | samtools sort -o {output} - 2> {log}" +rule indexUnfiltBam: + input: + "results/bowtie2/{sample}.sorted.bam" + output: + "results/bowtie2/{sample}.sorted.bai" + log: + "logs/{sample}.indexUnfiltBam" + shell: + "samtools index -c {input} {output} 2> {log}" + rule markDups: input: "results/bowtie2/{sample}_PPq30.sorted.bam" @@ -160,9 +175,9 @@ rule markDups: "logs/{sample}.dupmark" shell: "picard MarkDuplicates I={input} O={output.bam} \ - METRICS_FILE={output.dupQC} REMOVE_DUPLICATES=true ASSUME_SORTED=true 2> {log}" + METRICS_FILE={output.dupQC} REMOVE_DUPLICATES=false ASSUME_SORTED=true 2> {log}" -rule sort: +rule dedup: input: "results/bowtie2/{sample}_PPq30.sorted.dupmark.bam" output: @@ -170,7 +185,7 @@ rule sort: log: "logs/{sample}.dedup" shell: - "samtools view -b -f 2 {input} | samtools sort -o {output} - 2> {log}" + "samtools view -F 1804 -f 2 -b {input} | samtools sort -o {output} - 2> {log}" rule indexBam: input: @@ -191,11 +206,20 @@ rule indexBam: rule mappingStats: input: - "results/bowtie2/{sample}_PPq30.sorted.dedup.bam" + a="results/bowtie2/{sample}_PPq30.sorted.dedup.bam", + b="results/bowtie2/{sample}_PPq30.sorted.dupmark.bam", + c="results/bowtie2/{sample}_PPq30.sorted.bam", + d="results/bowtie2/{sample}.sorted.bam" output: - "results/qc/{sample}.flagstat.qc" - shell: - "samtools flagstat {input} > {output}" + a="results/qc/{sample}.dedup.flagstat.qc", + b="results/qc/{sample}.dupmark.flagstat.qc", + c="results/qc/{sample}.PPq30.flagstat.qc", + d="results/qc/{sample}.unfiltered.flagstat.qc", + run: + shell("samtools flagstat {input.a} > {output.a}") + shell("samtools flagstat {input.b} > {output.b}") + shell("samtools flagstat {input.c} > {output.c}") + shell("samtools flagstat {input.d} > {output.d}") rule preseq: input: @@ -207,7 +231,6 @@ rule preseq: shell: "preseq lc_extrap -v -output {output} -pe -bam {input} 2> {log}" - rule get_picard_complexity_metrics: input: "results/bowtie2/{sample}.sorted.bam" @@ -218,18 +241,39 @@ rule get_picard_complexity_metrics: shell: "picard -Xmx6G EstimateLibraryComplexity INPUT={input} OUTPUT={output} USE_JDK_DEFLATER=TRUE USE_JDK_INFLATER=TRUE VERBOSITY=ERROR" - # # Extract the actual estimated library size - # header_seen = False - # est_library_size = 0 - # with open(out_file, 'r') as fp: - # for line in fp: - # if header_seen: - # est_library_size = int(float(line.strip().split()[-1])) - # break - # if 'ESTIMATED_LIBRARY_SIZE' in line: - # header_seen = True - # - # return est_library_size +rule sort_name: + input: + "results/bowtie2/{sample}_PPq30.sorted.dupmark.bam" + output: + tmp = "results/bowtie2/{sample}_PPq30.sorted.dupmark.tmp.bam" + log: + "logs/{sample}.pbc.sort" + run: + shell("samtools sort -n {input} -o {output.tmp} 2> {log}") + +rule estimate_lib_complexity: + input: + "results/bowtie2/{sample}_PPq30.sorted.dupmark.tmp.bam" + output: + qc = "results/qc/{sample}.pbc.qc", + log: + "logs/{sample}.pbc" + shell: + """ + bedtools bamtobed -bedpe -i {input} \ + | awk 'BEGIN{{OFS="\\t"}}{{print $1,$2,$4,$6,$9,$10}}' \ + | grep -v 'chrM' | sort | uniq -c \ + | awk 'BEGIN{{mt=0;m0=0;m1=0;m2=0}} ($1==1){{m1=m1+1}} ($1==2){{m2=m2+1}} \ + {{m0=m0+1}} {{mt=mt+$1}} END{{printf "%d\\t%d\\t%d\\t%d\\t%f\\t%f\\t%f\\n" ,mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}}' \ + > {output.qc} + """ + +## convert to bedPE +## print read 1 scaffold, read 1 start coordinate, read 2 scaffold, read 2 end coordinate, strand read 1, strand read 2 +## remove mitochondrial genome +## sort by position and obtain uniq counts +## Format of file +## TotalReadPairs [tab] DistinctReadPairs [tab] OneReadPair [tab] TwoReadPairs [tab] NRF=Distinct/Total [tab] PBC1=OnePair/Distinct [tab] PBC2=OnePair/TwoPair # =============================================================================================== # 5. deepTools @@ -260,7 +304,7 @@ rule deeptools_summary: --outRawCounts {output.counts} 2> {log}" rule deeptools_correlation: - input: "results/deeptools/multibamsum.npz" + input: "results/qc/multibamsum.npz" output: fig="results/qc/pearsoncor_multibamsum.png", matrix="results/qc/pearsoncor_multibamsum_matrix.txt" @@ -279,8 +323,8 @@ rule deeptools_correlation: rule deeptools_coverage: input: - bam ="results/bowtie2/{sample}_PPq30.sorted.dedup.bam", - bai ="results/bowtie2/{sample}_PPq30.sorted.dedup.bai" + bam ="results/bowtie2/{sample}.sorted.bam", + bai ="results/bowtie2/{sample}.sorted.bai" output: "results/qc/{sample}.SeqDepthNorm.bw" log: @@ -290,7 +334,7 @@ rule deeptools_coverage: --bam {input.bam} \ --binSize 10 \ --normalizeUsing RPGC \ - --effectiveGenomeSize 3074798085 \ + --effectiveGenomeSize 2740338543 \ --extendReads \ -o {output} 2> {log}" @@ -311,25 +355,10 @@ rule deeptools_fingerprint: --plotFile {output.fig} \ --outQualityMetrics {output.metrics} \ --outRawCounts {output.rawcounts} \ - --minMappingQuality 20 \ + --minMappingQuality 30 \ --skipZeros \ --centerReads 2> {log}" -rule deeptools_plotCoverage: - input: - bam = expand(["results/bowtie2/{sample}_PPq30.sorted.dedup.bam"], sample=all_samples), - bai = expand(["results/bowtie2/{sample}_PPq30.sorted.dedup.bai"], sample=all_samples) - output: - fig="results/qc/plot_coverage.png", - rawcounts="results/qc/plot_coverage_rawcounts.tab" - log: - "logs/plotCoverage.deepTools" - shell: - "plotCoverage --bamfiles {input.bam} --plotFile {output.fig} \ - -n 1000000 \ - --outRawCounts {output.rawcounts} \ - --ignoreDuplicates 2> {log}" - rule deeptools_bamPEFragmentSize: input: bam = expand(["results/bowtie2/{sample}_PPq30.sorted.dedup.bam"], sample=all_samples), @@ -347,24 +376,101 @@ rule deeptools_bamPEFragmentSize: # =============================================================================================== -# 6. phantomPeakQuals +# 6. Cross-correlation scores - following ENCODE code # =============================================================================================== -rule phantomPeakQuals: +# Using only 1 of the read pairs, trim to 50 bp read +# trimfastq.py is a script from the ENCODE pipeline + +rule trim_read1: input: - "results/bowtie2/{sample}_PPq30.sorted.dedup.bam", + "rawdata/{sample}_R1.fastq.gz" output: - savp="results/qc/{sample}.spp.pdf", - savd="results/qc/{sample}.spp.Rdata", - out="results/qc/{sample}.spp.out" + "results/qc/{sample}_R1_trimmed.fastq.gz" + run: + shell("python scripts/trimfastq.py {input} 50 | gzip -nc > {output}") + +## Align trimmed read to the genome + +rule align_trimmed_read1: + input: + "results/qc/{sample}_R1_trimmed.fastq.gz" + output: + "results/bowtie2/{sample}_R1_trimmed.bam" + params: + index="genomes/Scras_dunnart_assem1.0_pb-ont-illsr_flyeassem_red-rd-scfitr2_pil2xwgs2_60chr" log: - "logs/{sample}.phantomPeak" + "logs/{sample}_align_trimmed_read1.log" shell: - "Rscript scripts/run_spp.R -c={input} -savp={output.savp} -savd={output.savd} -out={output.out} 2> {log}" + "bowtie2 -x {params.index} -U {input} 2> {log} | \ + samtools view -Su - | samtools sort -o {output} - 2> {log}" -# =============================================================================================== -# 7. Call peaks (MACS2) -# =============================================================================================== +## Filter alignment but don't dedup + +rule filter_sort_trimmed_alignment: + input: + "results/bowtie2/{sample}_R1_trimmed.bam" + output: + flagstat = "results/qc/{sample}_R1_trimmed.flagstat.qc", + bam = "results/bowtie2/{sample}_R1_trimmed_q30.bam" + log: + "logs/{sample}_align_trimmed_read1_filter.log" + run: + shell("samtools sort -n {input} -O SAM | SAMstats --sorted_sam_file - --outf {output.flagstat}") + shell("samtools view -F 1804 -q 30 -b {input} -o {output.bam}") + +rule bamtobed_crossC: + input: + "results/bowtie2/{sample}_R1_trimmed_q30.bam" + output: + tagAlign = "results/bed/{sample}_R1_trimmed_q30_SE.tagAlign.gz" + shell: + """ + bedtools bamtobed -i {input} | \ + awk 'BEGIN{{OFS="\\t"}}{{$4='N';$5='1000';print $0}}' |\ + gzip -nc > {output.tagAlign} + """ + +## Subsample 15 million reads for cross-correlation analysis +## Estimate read length from first 100 reads +rule subsample_aligned_reads: + input: + "results/bed/{sample}_R1_trimmed_q30_SE.tagAlign.gz" + output: + subsample = "results/bed/{sample}.filt.sample.15Mreads.SE.tagAlign.gz", + tmp = "results/bed/{sample}_R1_trimmed_q30_SE.tagAlign.tmp" + params: + nreads= 15000000 + run: + shell("""zcat {input} | grep -v 'chrM' | shuf -n {params.nreads} --random-source=<(openssl enc -aes-256-ctr -pass pass:$(zcat -f {input} | wc -c) -nosalt </dev/zero 2>/dev/null) | gzip -nc > {output.subsample}""") + shell("zcat {input} > {output.tmp}") + shell("""READ_LEN=\$(head -n 100 {output.tmp} | awk 'function abs(v) {{return v < 0 ? -v : v}} BEGIN{{sum=0}} {{sum+=abs($3-$2)}} END{{print int(sum/NR)}}')""") + +# Determine exclusion range for fragment length estimation. +# Use a fixed lowerbound at -500. +# Upperbound EXCLUSION_RANGE_MAX is +# Histone ChIP-seq: max(read_len + 10, 100) +# lowerbound is fixed at 500 for both +# read length is 50 because that is what we trimmed it to?? + +rule cross_correlation_SSP: + input: + "results/bed/{sample}.filt.sample.15Mreads.SE.tagAlign.gz" + output: + CC_SCORES_FILE="{sample}_filt_15Mreads.SE.cc.qc", + CC_PLOT_FILE="{sample}_filt_15Mreads.SE.cc.plot.pdf" + params: + EXCLUSION_RANGE_MIN=-500, + EXCLUSION_RANGE_MAX=110 + run: + shell("Rscript scripts/run_spp.R -c={input} -filtchr=chrM -savp={output.CC_PLOT_FILE} -out={output.CC_SCORES_FILE} -x={params.EXCLUSION_RANGE_MIN}:{params.EXCLUSION_RANGE_MAX} 2> {log}") + +# # CC_SCORE FILE format +# # Filename <tab> numReads <tab> estFragLen <tab> corr_estFragLen <tab> PhantomPeak <tab> corr_phantomPeak <tab> argmin_corr <tab> min_corr <tab> phantomPeakCoef <tab> relPhantomPeakCoef <tab> QualityTag +# +# # =============================================================================================== +# # 7. Call peaks (MACS2) +# # =============================================================================================== # peak calling for pair-end peaks # effective genome size for dunnart genome calculated with khmer program (see README.md) @@ -386,14 +492,13 @@ rule call_peaks_macs2: -c {input.control} \ --outdir results/macs2/ \ -n {params.name} \ - -g 3074798085 2> {log} " + -g 2740338543 -B 2> {log} " - -# =============================================================================================== -# 8. Peak QC -# > narrow peak counts -# > fraction of reads in peaks (convert BAM to bed first then do intersect with peaks) -# =============================================================================================== +# # # =============================================================================================== +# # # 8. Peak QC +# # # > narrow peak counts +# # # > fraction of reads in peaks (convert BAM to bed first then do intersect with peaks) +# # # =============================================================================================== # peak counts in a format that multiqc can handle rule get_narrow_peak_counts_for_multiqc: @@ -432,9 +537,9 @@ rule frip: "python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}" -# =============================================================================================== -# 9. Create consensus peaksets for replicates -# =============================================================================================== +# # =============================================================================================== +# # 9. Create consensus peaksets for replicates +# # =============================================================================================== # Based on ENCODE `overlap_peaks.py` - recommended for histone marks. # Need to pool peaks first for each replicate @@ -472,72 +577,77 @@ rule overlap_peaks_H3K27ac: shell: "python2.7 scripts/overlap_peaks.py {input.peak1} {input.peak2} {input.pooled} {output}" -# =============================================================================================== -# 10. QC on replicate peaks -# =============================================================================================== - -# FRiP for overlapped peaks -rule frip: - input: - bed = "results/bowtie2/{case}.bedpe", - peak = "results/macs2/{}_macs2_peaks.narrowPeak" - output: - "results/qc/{case}.frip.txt" - shell: - "python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}" - - -# =============================================================================================== -# 11. Annotate peaks relative to gene features (HOMER) -# =============================================================================================== - - - -# =============================================================================================== -# 12. Combine all QC into multiqc output -# =============================================================================================== - -rule multiqc: - input: - # fastqc - expand("results/fastqc/{sample}_R1_fastqc.html", sample=all_samples), - expand("results/fastqc/{sample}_R2_fastqc.html", sample=all_samples), - expand("results/fastqc/{sample}_R1_fastqc.zip", sample=all_samples), - expand("results/fastqc/{sample}_R2_fastqc.zip", sample=all_samples), - # bowtie2 - expand("logs/{sample}.align", sample=all_samples), - expand("logs/{sample}.flagstat.qc", sample=all_samples), - # preseq - expand("results/preseq/{sample}.ccurve.txt", sample=all_samples), - # picard - expand("results/picard/{sample}_est_lib_complex_metrics.txt", sample=all_samples), - # deepTools - "results/deeptools/multibamsum.npz", - "results/deeptools/multibamsum.tab", - "results/deeptools/pearsoncor_multibamsum.png", - "results/deeptools/pearsoncor_multibamsum_matrix.txt", - expand("results/deeptools/{sample}.SeqDepthNorm.bw", sample=all_samples), - "results/deeptools/multiBAM_fingerprint.png", - "results/deeptools/multiBAM_fingerprint_metrics.txt", - "results/deeptools/multiBAM_fingerprint_rawcounts.txt", - "results/deeptools/plot_coverage.png", - "results/deeptools/plot_coverage_rawcounts.tab", - "results/deeptools/bamPEFragmentSize_hist.png", - "results/deeptools/bamPEFragmentSize_rawcounts.tab", - # phantomPeaks - expand("results/phantomPeaks/{sample}.spp.pdf", sample = IPS), - expand("results/phantomPeaks/{sample}.spp.Rdata", sample = IPS), - expand("results/phantomPeaks/{sample}.spp.out", sample = IPS), - # macs2 - expand("results/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS), - expand("results/macs2/{case}_vs_{control}_macs2_peaks.xls", zip, case=IPS, control=INPUTS), - expand("results/macs2/{case}_vs_{control}_macs2_summits.bed", zip, case=IPS, control=INPUTS), - expand("results/macs2/{case}-vs-{control}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS), - expand("results/frip/{case}_vs_{control}.frip.txt", case=IPS, control=INPUTS) - output: - directory("results/multiqc/multiqc_report_data/"), - "results/multiqc/multiqc_report.html", - log: - "logs/multiqc.log" - wrapper: - "0.31.1/bio/multiqc" +#rule consensus_peaks: + + +#rule enhancer_promoter_peaks: +# enhancer = peaks with only H3K27ac +# Promoter = peaks with both H3K27ac & H3K4me3, or just H3K4me3 +# Make a bargraph with the amounts for each + + +# # =============================================================================================== +# # 10. QC on replicate peaks +# # =============================================================================================== +# +# # FRiP for overlapped peaks +# rule frip: +# input: +# bed = "results/bowtie2/{case}.bedpe", +# peak = "results/macs2/{}_macs2_peaks.narrowPeak" +# output: +# "results/qc/{case}.frip.txt" +# shell: +# "python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}" +# +# +# # =============================================================================================== +# # 11. Combine all QC into multiqc output +# # =============================================================================================== +# +# rule multiqc: +# input: +# # fastqc +# expand("results/fastqc/{sample}_R1_fastqc.html", sample=all_samples), +# expand("results/fastqc/{sample}_R2_fastqc.html", sample=all_samples), +# expand("results/fastqc/{sample}_R1_fastqc.zip", sample=all_samples), +# expand("results/fastqc/{sample}_R2_fastqc.zip", sample=all_samples), +# # bowtie2 +# expand("logs/{sample}.align", sample=all_samples), +# expand("logs/{sample}.flagstat.qc", sample=all_samples), +# # preseq +# expand("results/preseq/{sample}.ccurve.txt", sample=all_samples), +# # picard +# expand("results/picard/{sample}_est_lib_complex_metrics.txt", sample=all_samples), +# # deepTools +# "results/deeptools/multibamsum.npz", +# "results/deeptools/multibamsum.tab", +# "results/deeptools/pearsoncor_multibamsum.png", +# "results/deeptools/pearsoncor_multibamsum_matrix.txt", +# expand("results/deeptools/{sample}.SeqDepthNorm.bw", sample=all_samples), +# "results/deeptools/multiBAM_fingerprint.png", +# "results/deeptools/multiBAM_fingerprint_metrics.txt", +# "results/deeptools/multiBAM_fingerprint_rawcounts.txt", +# "results/deeptools/plot_coverage.png", +# "results/deeptools/plot_coverage_rawcounts.tab", +# "results/deeptools/bamPEFragmentSize_hist.png", +# "results/deeptools/bamPEFragmentSize_rawcounts.tab", +# # phantomPeaks +# expand("results/phantomPeaks/{sample}.spp.pdf", sample = IPS), +# expand("results/phantomPeaks/{sample}.spp.Rdata", sample = IPS), +# expand("results/phantomPeaks/{sample}.spp.out", sample = IPS), +# # macs2 +# expand("results/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS), +# expand("results/macs2/{case}_vs_{control}_macs2_peaks.xls", zip, case=IPS, control=INPUTS), +# expand("results/macs2/{case}_vs_{control}_macs2_summits.bed", zip, case=IPS, control=INPUTS), +# expand("results/macs2/{case}-vs-{control}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS), +# expand("results/frip/{case}_vs_{control}.frip.txt", case=IPS, control=INPUTS) +# output: +# directory("results/multiqc/multiqc_report_data/"), +# "results/multiqc/multiqc_report.html", +# conda: +# "envs/multiqc.yaml" +# log: +# "logs/multiqc.log" +# wrapper: +# "0.31.1/bio/multiqc" -- GitLab