Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
ChIP-seq
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
IGR-lab
ChIP-seq
Commits
5f3ac1e3
Commit
5f3ac1e3
authored
4 years ago
by
Laura Cook
Browse files
Options
Downloads
Patches
Plain Diff
rule to compute NRF, PBC1, PBC2 and rule to compute NSC and RSC
parent
9f47e352
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
dunnart/Snakefile
+263
-153
263 additions, 153 deletions
dunnart/Snakefile
with
263 additions
and
153 deletions
dunnart/Snakefile
+
263
−
153
View file @
5f3ac1e3
...
...
@@ -52,14 +52,20 @@ rule all:
expand("results/qc/{sample}_R1_fastqc.zip", sample=all_samples),
expand("results/qc/{sample}_R2_fastqc.zip", sample=all_samples),
expand("results/bowtie2/{sample}.sorted.bam", sample=all_samples),
expand("results/bowtie2/{sample}.sorted.bai", sample=all_samples),
expand("results/bowtie2/{sample}_PPq30.sorted.bam", sample=all_samples),
expand("results/bowtie2/{sample}_PPq30.sorted.dupmark.bam", sample=all_samples),
expand("results/bowtie2/{sample}_PPq30.sorted.dedup.bam", sample=all_samples),
expand("results/bowtie2/{sample}_PPq30.sorted.dedup.bai", sample=all_samples),
expand("results/qc/{sample}.flagstat.qc", sample=all_samples),
expand("results/qc/{sample}.sorted.flagstat.qc", sample=all_samples),
expand("results/qc/{sample}.dedup.flagstat.qc", sample=all_samples),
expand("results/qc/{sample}.dupmark.flagstat.qc", sample=all_samples),
expand("results/qc/{sample}.PPq30.flagstat.qc", sample=all_samples),
expand("results/qc/{sample}.ccurve.txt", sample=all_samples),
expand("results/qc/{sample}_est_lib_complex_metrics.txt", sample=all_samples),
expand("logs/{sample}.picardLibComplexity", sample=all_samples),
expand("results/qc/{sample}.pbc.qc", sample=all_samples),
expand("results/bowtie2/{sample}_PPq30.sorted.tmp.bam",sample=all_samples),
"results/qc/multibamsum.npz",
"results/qc/multibamsum.tab",
"results/qc/pearsoncor_multibamsum.png",
...
...
@@ -68,26 +74,25 @@ rule all:
"results/qc/multiBAM_fingerprint.png",
"results/qc/multiBAM_fingerprint_metrics.txt",
"results/qc/multiBAM_fingerprint_rawcounts.txt",
"results/qc/plot_coverage.png",
"results/qc/plot_coverage_rawcounts.tab",
"results/qc/bamPEFragmentSize_hist.png",
"results/qc/bamPEFragmentSize_rawcounts.tab",
expand("results/qc/{sample}.spp.pdf", sample = all_samples),
expand("results/qc/{sample}.spp.Rdata", sample = all_samples),
expand("results/qc/{sample}.spp.out", sample = all_samples),
expand("results/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS),
expand("results/macs2/{case}_vs_{control}_macs2_peaks.xls", zip, case=IPS, control=INPUTS),
expand("results/macs2/{case}_vs_{control}_macs2_summits.bed", zip, case=IPS, control=INPUTS),
expand("results/qxc{case}-vs-{control}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS),
expand("results/bowtie2/{case}.bedpe", case=IPS),
expand("logs/{case}.bamToBed", case=IPS),
expand("results/qc/{case}_vs_{control}.frip.txt", case=IPS, control=INPUTS),
"results/macs2/H3K4me3_pooled_macs2_peaks.narrowPeak",
"results/macs2/H3K27ac_pooled_macs2_peaks.narrowPeak",
"results/macs2/H3K4me3_overlap.narrowPeak",
"results/macs2/H3K27ac_overlap.narrowPeak",
directory("results/multiqc/multiqc_report_data/"),
"results/multiqc/multiqc_report.html"
expand("results/qc/{sample}_R1_trimmed.flagstat.qc", sample=all_samples),
expand("results/bowtie2/{sample}_R1_trimmed_q30.bam", sample=all_samples),
expand("{sample}_filt_15Mreads.SE.cc.qc", sample=all_samples),
expand("{sample}_filt_15Mreads.SE.cc.plot.pdf", sample=all_samples)
# expand("results/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS),
# expand("results/macs2/{case}_vs_{control}_macs2_peaks.xls", zip, case=IPS, control=INPUTS),
# expand("results/macs2/{case}_vs_{control}_macs2_summits.bed", zip, case=IPS, control=INPUTS),
# expand("results/qxc{case}-vs-{control}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS),
# expand("results/bowtie2/{case}.bedpe", case=IPS),
# expand("logs/{case}.bamToBed", case=IPS),
# expand("results/qc/{case}_vs_{control}.frip.txt", case=IPS, control=INPUTS)
# "results/macs2/H3K4me3_pooled_macs2_peaks.narrowPeak",
# "results/macs2/H3K27ac_pooled_macs2_peaks.narrowPeak",
# "results/macs2/H3K4me3_overlap.narrowPeak",
# "results/macs2/H3K27ac_overlap.narrowPeak",
# directory("results/multiqc/multiqc_report_data/"),
# "results/multiqc/multiqc_report.html"
# ===============================================================================================
# 1. FASTQC
# ===============================================================================================
...
...
@@ -103,7 +108,7 @@ rule fastqc:
log:
"logs/{sample}.fastqc"
shell:
"fastqc {input} -t 6 --extract --outdir=results/
fast
qc/ 2> {log}"
"fastqc {input} -t 6 --extract --outdir=results/qc/ 2> {log}"
# ===============================================================================================
# 2. ALIGNMENT
...
...
@@ -117,12 +122,12 @@ rule align:
output:
"results/bowtie2/{sample}.sorted.bam"
params:
index="genomes/dunnart_
pseudochr_vs_mSarHar1.11_v1
"
index="genomes/
Scras_
dunnart_
assem1.0_pb-ont-illsr_flyeassem_red-rd-scfitr2_pil2xwgs2_60chr
"
log:
"logs/{sample}.align"
shell:
"bowtie2 --threads 8 -q -X
1
000 --very-sensitive -x {params.index} -1 {input.R1} -2 {input.R2} \
| samtools view
-u -h - | samtools sort -o {output} - 2> {log}"
"bowtie2 --threads 8 -q -X
2
000 --very-sensitive -x {params.index} -1 {input.R1} -2 {input.R2} \
| samtools view -u -h - | samtools sort -o {output} - 2> {log}"
# ===============================================================================================
...
...
@@ -150,6 +155,16 @@ rule filter:
shell:
"samtools view -b -q 30 -F 1804 -f 2 {input} | samtools sort -o {output} - 2> {log}"
rule indexUnfiltBam:
input:
"results/bowtie2/{sample}.sorted.bam"
output:
"results/bowtie2/{sample}.sorted.bai"
log:
"logs/{sample}.indexUnfiltBam"
shell:
"samtools index -c {input} {output} 2> {log}"
rule markDups:
input:
"results/bowtie2/{sample}_PPq30.sorted.bam"
...
...
@@ -160,9 +175,9 @@ rule markDups:
"logs/{sample}.dupmark"
shell:
"picard MarkDuplicates I={input} O={output.bam} \
METRICS_FILE={output.dupQC} REMOVE_DUPLICATES=
tru
e ASSUME_SORTED=true 2> {log}"
METRICS_FILE={output.dupQC} REMOVE_DUPLICATES=
fals
e ASSUME_SORTED=true 2> {log}"
rule
sort
:
rule
dedup
:
input:
"results/bowtie2/{sample}_PPq30.sorted.dupmark.bam"
output:
...
...
@@ -170,7 +185,7 @@ rule sort:
log:
"logs/{sample}.dedup"
shell:
"samtools view -
b
-f 2 {input} | samtools sort -o {output} - 2> {log}"
"samtools view -
F 1804
-f 2
-b
{input} | samtools sort -o {output} - 2> {log}"
rule indexBam:
input:
...
...
@@ -191,11 +206,20 @@ rule indexBam:
rule mappingStats:
input:
"results/bowtie2/{sample}_PPq30.sorted.dedup.bam"
a="results/bowtie2/{sample}_PPq30.sorted.dedup.bam",
b="results/bowtie2/{sample}_PPq30.sorted.dupmark.bam",
c="results/bowtie2/{sample}_PPq30.sorted.bam",
d="results/bowtie2/{sample}.sorted.bam"
output:
"results/qc/{sample}.flagstat.qc"
shell:
"samtools flagstat {input} > {output}"
a="results/qc/{sample}.dedup.flagstat.qc",
b="results/qc/{sample}.dupmark.flagstat.qc",
c="results/qc/{sample}.PPq30.flagstat.qc",
d="results/qc/{sample}.unfiltered.flagstat.qc",
run:
shell("samtools flagstat {input.a} > {output.a}")
shell("samtools flagstat {input.b} > {output.b}")
shell("samtools flagstat {input.c} > {output.c}")
shell("samtools flagstat {input.d} > {output.d}")
rule preseq:
input:
...
...
@@ -207,7 +231,6 @@ rule preseq:
shell:
"preseq lc_extrap -v -output {output} -pe -bam {input} 2> {log}"
rule get_picard_complexity_metrics:
input:
"results/bowtie2/{sample}.sorted.bam"
...
...
@@ -218,18 +241,39 @@ rule get_picard_complexity_metrics:
shell:
"picard -Xmx6G EstimateLibraryComplexity INPUT={input} OUTPUT={output} USE_JDK_DEFLATER=TRUE USE_JDK_INFLATER=TRUE VERBOSITY=ERROR"
# # Extract the actual estimated library size
# header_seen = False
# est_library_size = 0
# with open(out_file, 'r') as fp:
# for line in fp:
# if header_seen:
# est_library_size = int(float(line.strip().split()[-1]))
# break
# if 'ESTIMATED_LIBRARY_SIZE' in line:
# header_seen = True
#
# return est_library_size
rule sort_name:
input:
"results/bowtie2/{sample}_PPq30.sorted.dupmark.bam"
output:
tmp = "results/bowtie2/{sample}_PPq30.sorted.dupmark.tmp.bam"
log:
"logs/{sample}.pbc.sort"
run:
shell("samtools sort -n {input} -o {output.tmp} 2> {log}")
rule estimate_lib_complexity:
input:
"results/bowtie2/{sample}_PPq30.sorted.dupmark.tmp.bam"
output:
qc = "results/qc/{sample}.pbc.qc",
log:
"logs/{sample}.pbc"
shell:
"""
bedtools bamtobed -bedpe -i {input} \
| awk 'BEGIN{{OFS="\\t"}}{{print $1,$2,$4,$6,$9,$10}}' \
| grep -v 'chrM' | sort | uniq -c \
| awk 'BEGIN{{mt=0;m0=0;m1=0;m2=0}} ($1==1){{m1=m1+1}} ($1==2){{m2=m2+1}} \
{{m0=m0+1}} {{mt=mt+$1}} END{{printf "%d\\t%d\\t%d\\t%d\\t%f\\t%f\\t%f\\n" ,mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}}' \
> {output.qc}
"""
## convert to bedPE
## print read 1 scaffold, read 1 start coordinate, read 2 scaffold, read 2 end coordinate, strand read 1, strand read 2
## remove mitochondrial genome
## sort by position and obtain uniq counts
## Format of file
## TotalReadPairs [tab] DistinctReadPairs [tab] OneReadPair [tab] TwoReadPairs [tab] NRF=Distinct/Total [tab] PBC1=OnePair/Distinct [tab] PBC2=OnePair/TwoPair
# ===============================================================================================
# 5. deepTools
...
...
@@ -260,7 +304,7 @@ rule deeptools_summary:
--outRawCounts {output.counts} 2> {log}"
rule deeptools_correlation:
input: "results/
deeptools
/multibamsum.npz"
input: "results/
qc
/multibamsum.npz"
output:
fig="results/qc/pearsoncor_multibamsum.png",
matrix="results/qc/pearsoncor_multibamsum_matrix.txt"
...
...
@@ -279,8 +323,8 @@ rule deeptools_correlation:
rule deeptools_coverage:
input:
bam ="results/bowtie2/{sample}
_PPq30
.sorted.
dedup.
bam",
bai ="results/bowtie2/{sample}
_PPq30
.sorted.
dedup.
bai"
bam ="results/bowtie2/{sample}.sorted.bam",
bai ="results/bowtie2/{sample}.sorted.bai"
output:
"results/qc/{sample}.SeqDepthNorm.bw"
log:
...
...
@@ -290,7 +334,7 @@ rule deeptools_coverage:
--bam {input.bam} \
--binSize 10 \
--normalizeUsing RPGC \
--effectiveGenomeSize
3074798085
\
--effectiveGenomeSize
2740338543
\
--extendReads \
-o {output} 2> {log}"
...
...
@@ -311,25 +355,10 @@ rule deeptools_fingerprint:
--plotFile {output.fig} \
--outQualityMetrics {output.metrics} \
--outRawCounts {output.rawcounts} \
--minMappingQuality
2
0 \
--minMappingQuality
3
0 \
--skipZeros \
--centerReads 2> {log}"
rule deeptools_plotCoverage:
input:
bam = expand(["results/bowtie2/{sample}_PPq30.sorted.dedup.bam"], sample=all_samples),
bai = expand(["results/bowtie2/{sample}_PPq30.sorted.dedup.bai"], sample=all_samples)
output:
fig="results/qc/plot_coverage.png",
rawcounts="results/qc/plot_coverage_rawcounts.tab"
log:
"logs/plotCoverage.deepTools"
shell:
"plotCoverage --bamfiles {input.bam} --plotFile {output.fig} \
-n 1000000 \
--outRawCounts {output.rawcounts} \
--ignoreDuplicates 2> {log}"
rule deeptools_bamPEFragmentSize:
input:
bam = expand(["results/bowtie2/{sample}_PPq30.sorted.dedup.bam"], sample=all_samples),
...
...
@@ -347,24 +376,101 @@ rule deeptools_bamPEFragmentSize:
# ===============================================================================================
# 6.
phantomPeakQuals
# 6.
Cross-correlation scores - following ENCODE code
# ===============================================================================================
rule phantomPeakQuals:
# Using only 1 of the read pairs, trim to 50 bp read
# trimfastq.py is a script from the ENCODE pipeline
rule trim_read1:
input:
"r
esults/bowtie2/{sample}_PPq30.sorted.dedup.bam",
"r
awdata/{sample}_R1.fastq.gz"
output:
savp="results/qc/{sample}.spp.pdf",
savd="results/qc/{sample}.spp.Rdata",
out="results/qc/{sample}.spp.out"
"results/qc/{sample}_R1_trimmed.fastq.gz"
run:
shell("python scripts/trimfastq.py {input} 50 | gzip -nc > {output}")
## Align trimmed read to the genome
rule align_trimmed_read1:
input:
"results/qc/{sample}_R1_trimmed.fastq.gz"
output:
"results/bowtie2/{sample}_R1_trimmed.bam"
params:
index="genomes/Scras_dunnart_assem1.0_pb-ont-illsr_flyeassem_red-rd-scfitr2_pil2xwgs2_60chr"
log:
"logs/{sample}
.phantomPeak
"
"logs/{sample}
_align_trimmed_read1.log
"
shell:
"Rscript scripts/run_spp.R -c={input} -savp={output.savp} -savd={output.savd} -out={output.out} 2> {log}"
"bowtie2 -x {params.index} -U {input} 2> {log} | \
samtools view -Su - | samtools sort -o {output} - 2> {log}"
# ===============================================================================================
# 7. Call peaks (MACS2)
# ===============================================================================================
## Filter alignment but don't dedup
rule filter_sort_trimmed_alignment:
input:
"results/bowtie2/{sample}_R1_trimmed.bam"
output:
flagstat = "results/qc/{sample}_R1_trimmed.flagstat.qc",
bam = "results/bowtie2/{sample}_R1_trimmed_q30.bam"
log:
"logs/{sample}_align_trimmed_read1_filter.log"
run:
shell("samtools sort -n {input} -O SAM | SAMstats --sorted_sam_file - --outf {output.flagstat}")
shell("samtools view -F 1804 -q 30 -b {input} -o {output.bam}")
rule bamtobed_crossC:
input:
"results/bowtie2/{sample}_R1_trimmed_q30.bam"
output:
tagAlign = "results/bed/{sample}_R1_trimmed_q30_SE.tagAlign.gz"
shell:
"""
bedtools bamtobed -i {input} | \
awk 'BEGIN{{OFS="\\t"}}{{$4='N';$5='1000';print $0}}' |\
gzip -nc > {output.tagAlign}
"""
## Subsample 15 million reads for cross-correlation analysis
## Estimate read length from first 100 reads
rule subsample_aligned_reads:
input:
"results/bed/{sample}_R1_trimmed_q30_SE.tagAlign.gz"
output:
subsample = "results/bed/{sample}.filt.sample.15Mreads.SE.tagAlign.gz",
tmp = "results/bed/{sample}_R1_trimmed_q30_SE.tagAlign.tmp"
params:
nreads= 15000000
run:
shell("""zcat {input} | grep -v 'chrM' | shuf -n {params.nreads} --random-source=<(openssl enc -aes-256-ctr -pass pass:$(zcat -f {input} | wc -c) -nosalt </dev/zero 2>/dev/null) | gzip -nc > {output.subsample}""")
shell("zcat {input} > {output.tmp}")
shell("""READ_LEN=\$(head -n 100 {output.tmp} | awk 'function abs(v) {{return v < 0 ? -v : v}} BEGIN{{sum=0}} {{sum+=abs($3-$2)}} END{{print int(sum/NR)}}')""")
# Determine exclusion range for fragment length estimation.
# Use a fixed lowerbound at -500.
# Upperbound EXCLUSION_RANGE_MAX is
# Histone ChIP-seq: max(read_len + 10, 100)
# lowerbound is fixed at 500 for both
# read length is 50 because that is what we trimmed it to??
rule cross_correlation_SSP:
input:
"results/bed/{sample}.filt.sample.15Mreads.SE.tagAlign.gz"
output:
CC_SCORES_FILE="{sample}_filt_15Mreads.SE.cc.qc",
CC_PLOT_FILE="{sample}_filt_15Mreads.SE.cc.plot.pdf"
params:
EXCLUSION_RANGE_MIN=-500,
EXCLUSION_RANGE_MAX=110
run:
shell("Rscript scripts/run_spp.R -c={input} -filtchr=chrM -savp={output.CC_PLOT_FILE} -out={output.CC_SCORES_FILE} -x={params.EXCLUSION_RANGE_MIN}:{params.EXCLUSION_RANGE_MAX} 2> {log}")
# # CC_SCORE FILE format
# # Filename <tab> numReads <tab> estFragLen <tab> corr_estFragLen <tab> PhantomPeak <tab> corr_phantomPeak <tab> argmin_corr <tab> min_corr <tab> phantomPeakCoef <tab> relPhantomPeakCoef <tab> QualityTag
#
# # ===============================================================================================
# # 7. Call peaks (MACS2)
# # ===============================================================================================
# peak calling for pair-end peaks
# effective genome size for dunnart genome calculated with khmer program (see README.md)
...
...
@@ -386,14 +492,13 @@ rule call_peaks_macs2:
-c {input.control} \
--outdir results/macs2/ \
-n {params.name} \
-g
3074798085
2> {log} "
-g
2740338543 -B
2> {log} "
# ===============================================================================================
# 8. Peak QC
# > narrow peak counts
# > fraction of reads in peaks (convert BAM to bed first then do intersect with peaks)
# ===============================================================================================
# # # ===============================================================================================
# # # 8. Peak QC
# # # > narrow peak counts
# # # > fraction of reads in peaks (convert BAM to bed first then do intersect with peaks)
# # # ===============================================================================================
# peak counts in a format that multiqc can handle
rule get_narrow_peak_counts_for_multiqc:
...
...
@@ -432,9 +537,9 @@ rule frip:
"python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}"
# ===============================================================================================
# 9. Create consensus peaksets for replicates
# ===============================================================================================
#
# ===============================================================================================
#
# 9. Create consensus peaksets for replicates
#
# ===============================================================================================
# Based on ENCODE `overlap_peaks.py` - recommended for histone marks.
# Need to pool peaks first for each replicate
...
...
@@ -472,72 +577,77 @@ rule overlap_peaks_H3K27ac:
shell:
"python2.7 scripts/overlap_peaks.py {input.peak1} {input.peak2} {input.pooled} {output}"
# ===============================================================================================
# 10. QC on replicate peaks
# ===============================================================================================
# FRiP for overlapped peaks
rule frip:
input:
bed = "results/bowtie2/{case}.bedpe",
peak = "results/macs2/{}_macs2_peaks.narrowPeak"
output:
"results/qc/{case}.frip.txt"
shell:
"python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}"
# ===============================================================================================
# 11. Annotate peaks relative to gene features (HOMER)
# ===============================================================================================
# ===============================================================================================
# 12. Combine all QC into multiqc output
# ===============================================================================================
rule multiqc:
input:
# fastqc
expand("results/fastqc/{sample}_R1_fastqc.html", sample=all_samples),
expand("results/fastqc/{sample}_R2_fastqc.html", sample=all_samples),
expand("results/fastqc/{sample}_R1_fastqc.zip", sample=all_samples),
expand("results/fastqc/{sample}_R2_fastqc.zip", sample=all_samples),
# bowtie2
expand("logs/{sample}.align", sample=all_samples),
expand("logs/{sample}.flagstat.qc", sample=all_samples),
# preseq
expand("results/preseq/{sample}.ccurve.txt", sample=all_samples),
# picard
expand("results/picard/{sample}_est_lib_complex_metrics.txt", sample=all_samples),
# deepTools
"results/deeptools/multibamsum.npz",
"results/deeptools/multibamsum.tab",
"results/deeptools/pearsoncor_multibamsum.png",
"results/deeptools/pearsoncor_multibamsum_matrix.txt",
expand("results/deeptools/{sample}.SeqDepthNorm.bw", sample=all_samples),
"results/deeptools/multiBAM_fingerprint.png",
"results/deeptools/multiBAM_fingerprint_metrics.txt",
"results/deeptools/multiBAM_fingerprint_rawcounts.txt",
"results/deeptools/plot_coverage.png",
"results/deeptools/plot_coverage_rawcounts.tab",
"results/deeptools/bamPEFragmentSize_hist.png",
"results/deeptools/bamPEFragmentSize_rawcounts.tab",
# phantomPeaks
expand("results/phantomPeaks/{sample}.spp.pdf", sample = IPS),
expand("results/phantomPeaks/{sample}.spp.Rdata", sample = IPS),
expand("results/phantomPeaks/{sample}.spp.out", sample = IPS),
# macs2
expand("results/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS),
expand("results/macs2/{case}_vs_{control}_macs2_peaks.xls", zip, case=IPS, control=INPUTS),
expand("results/macs2/{case}_vs_{control}_macs2_summits.bed", zip, case=IPS, control=INPUTS),
expand("results/macs2/{case}-vs-{control}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS),
expand("results/frip/{case}_vs_{control}.frip.txt", case=IPS, control=INPUTS)
output:
directory("results/multiqc/multiqc_report_data/"),
"results/multiqc/multiqc_report.html",
log:
"logs/multiqc.log"
wrapper:
"0.31.1/bio/multiqc"
#rule consensus_peaks:
#rule enhancer_promoter_peaks:
# enhancer = peaks with only H3K27ac
# Promoter = peaks with both H3K27ac & H3K4me3, or just H3K4me3
# Make a bargraph with the amounts for each
# # ===============================================================================================
# # 10. QC on replicate peaks
# # ===============================================================================================
#
# # FRiP for overlapped peaks
# rule frip:
# input:
# bed = "results/bowtie2/{case}.bedpe",
# peak = "results/macs2/{}_macs2_peaks.narrowPeak"
# output:
# "results/qc/{case}.frip.txt"
# shell:
# "python2.7 scripts/encode_frip.py {input.bed} {input.peak} > {output}"
#
#
# # ===============================================================================================
# # 11. Combine all QC into multiqc output
# # ===============================================================================================
#
# rule multiqc:
# input:
# # fastqc
# expand("results/fastqc/{sample}_R1_fastqc.html", sample=all_samples),
# expand("results/fastqc/{sample}_R2_fastqc.html", sample=all_samples),
# expand("results/fastqc/{sample}_R1_fastqc.zip", sample=all_samples),
# expand("results/fastqc/{sample}_R2_fastqc.zip", sample=all_samples),
# # bowtie2
# expand("logs/{sample}.align", sample=all_samples),
# expand("logs/{sample}.flagstat.qc", sample=all_samples),
# # preseq
# expand("results/preseq/{sample}.ccurve.txt", sample=all_samples),
# # picard
# expand("results/picard/{sample}_est_lib_complex_metrics.txt", sample=all_samples),
# # deepTools
# "results/deeptools/multibamsum.npz",
# "results/deeptools/multibamsum.tab",
# "results/deeptools/pearsoncor_multibamsum.png",
# "results/deeptools/pearsoncor_multibamsum_matrix.txt",
# expand("results/deeptools/{sample}.SeqDepthNorm.bw", sample=all_samples),
# "results/deeptools/multiBAM_fingerprint.png",
# "results/deeptools/multiBAM_fingerprint_metrics.txt",
# "results/deeptools/multiBAM_fingerprint_rawcounts.txt",
# "results/deeptools/plot_coverage.png",
# "results/deeptools/plot_coverage_rawcounts.tab",
# "results/deeptools/bamPEFragmentSize_hist.png",
# "results/deeptools/bamPEFragmentSize_rawcounts.tab",
# # phantomPeaks
# expand("results/phantomPeaks/{sample}.spp.pdf", sample = IPS),
# expand("results/phantomPeaks/{sample}.spp.Rdata", sample = IPS),
# expand("results/phantomPeaks/{sample}.spp.out", sample = IPS),
# # macs2
# expand("results/macs2/{case}_vs_{control}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS),
# expand("results/macs2/{case}_vs_{control}_macs2_peaks.xls", zip, case=IPS, control=INPUTS),
# expand("results/macs2/{case}_vs_{control}_macs2_summits.bed", zip, case=IPS, control=INPUTS),
# expand("results/macs2/{case}-vs-{control}-narrowpeak-count_mqc.json", zip, case=IPS, control=INPUTS),
# expand("results/frip/{case}_vs_{control}.frip.txt", case=IPS, control=INPUTS)
# output:
# directory("results/multiqc/multiqc_report_data/"),
# "results/multiqc/multiqc_report.html",
# conda:
# "envs/multiqc.yaml"
# log:
# "logs/multiqc.log"
# wrapper:
# "0.31.1/bio/multiqc"
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment