Skip to content
Snippets Groups Projects
Commit 6f7e510a authored by Laura Cook's avatar Laura Cook
Browse files

created separate snakefiles for histone marks as I couldn't get the wildcards...

created separate snakefiles for histone marks as I couldn't get the wildcards to work so had to hardcode one of the rules
parent 8661c846
Branches
No related tags found
No related merge requests found
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# 10. Present QC for raw read, alignment, peak-calling in MultiQC # 10. Present QC for raw read, alignment, peak-calling in MultiQC
configfile: "envs/config.yaml" configfile: "configs/config_H3K427ac.yaml"
# Contains sample and genome information # Contains sample and genome information
# calls the SRR.txt file that contains the samples as either IP or control # calls the SRR.txt file that contains the samples as either IP or control
...@@ -40,7 +40,7 @@ unique_marks = list(set(MARK)) ...@@ -40,7 +40,7 @@ unique_marks = list(set(MARK))
unique_stages = list(set(STAGE)) unique_stages = list(set(STAGE))
## combine all samples ## combine all samples
all_samples = IPS + unique_inputs all_samples = IPS + INPUTS
# =============================================================================================== # ===============================================================================================
# Output targets # Output targets
...@@ -80,17 +80,19 @@ rule all: ...@@ -80,17 +80,19 @@ rule all:
"logs/input_E13.5_H3K27ac.mergeBAM", "logs/input_E13.5_H3K27ac.mergeBAM",
"logs/input_E14.5_H3K27ac.mergeBAM", "logs/input_E14.5_H3K27ac.mergeBAM",
"logs/input_E15.5_H3K27ac.mergeBAM", "logs/input_E15.5_H3K27ac.mergeBAM",
"results/qc/multibamsum.npz", # "results/qc/multibamsum.npz",
"results/qc/multibamsum.tab", # "results/qc/multibamsum.tab",
"results/qc/pearsoncor_multibamsum.png", # "results/qc/pearsoncor_multibamsum.png",
"results/qc/pearsoncor_multibamsum_matrix.txt", # "results/qc/pearsoncor_multibamsum_matrix.txt",
expand("results/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK), # expand("results/qc/{sample}_{stage}_{mark}.SeqDepthNorm.bw", zip, sample=all_samples, stage=STAGE, mark=MARK),
"results/qc/multiBAM_fingerprint.png", # "results/qc/multiBAM_fingerprint.png",
"results/qc/multiBAM_fingerprint_metrics.txt", # "results/qc/multiBAM_fingerprint_metrics.txt",
"results/qc/multiBAM_fingerprint_rawcounts.txt", # "results/qc/multiBAM_fingerprint_rawcounts.txt",
"results/qc/plot_coverage.png", # "results/qc/plot_coverage.png",
"results/qc/plot_coverage_rawcounts.tab", # "results/qc/plot_coverage_rawcounts.tab",
expand("results/qc/{sample}_{stage}_{mark}.pbc.qc", zip, sample=all_samples, stage=STAGE, mark=MARK), #expand("results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("logs/{sample}_{stage}_{mark}.pbc.sort", zip, sample=all_samples, stage=STAGE, mark=MARK),
#expand("results/qc/{sample}_{stage}_{mark}.pbc.qc", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc",zip, sample=all_samples, stage=STAGE, mark=MARK), expand("results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.qc",zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK), expand("results/qc/{sample}_{stage}_{mark}_filt_15Mreads.SE.cc.plot.pdf", zip, sample=all_samples, stage=STAGE, mark=MARK),
expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), expand("results/macs2/{case}_vs_{control}_{stage}_{mark}_macs2_peaks.narrowPeak", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
...@@ -100,14 +102,13 @@ rule all: ...@@ -100,14 +102,13 @@ rule all:
expand("results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed", zip, case=IPS, stage=STAGE, mark=MARK), expand("results/bwa/{case}_{stage}_{mark}_q30.sorted.dedup.bed", zip, case=IPS, stage=STAGE, mark=MARK),
expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK), expand("logs/{case}_{stage}_{mark}.bamToBed", zip, case=IPS, stage=STAGE, mark=MARK),
expand("results/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK), expand("results/qc/{case}_vs_{control}_{stage}_{mark}.frip.txt", zip, case=IPS, control=INPUTS, stage=STAGE, mark=MARK),
directory("results/macs2/pooled"), directory("results/macs2/pooled/"),
# "results/macs2/H3K27ac_E10.5_overlap.narrowPeak", "results/macs2/H3K27ac_E10.5_overlap.narrowPeak",
# "results/macs2/H3K27ac_E11.5_overlap.narrowPeak", "results/macs2/H3K27ac_E11.5_overlap.narrowPeak",
# "results/macs2/H3K27ac_E12.5_overlap.narrowPeak", "results/macs2/H3K27ac_E12.5_overlap.narrowPeak",
# "results/macs2/H3K27ac_E13.5_overlap.narrowPeak", "results/macs2/H3K27ac_E13.5_overlap.narrowPeak",
# "results/macs2/H3K27ac_E14.5_overlap.narrowPeak", "results/macs2/H3K27ac_E14.5_overlap.narrowPeak",
# "results/macs2/H3K27ac_E15.5_overlap.narrowPeak" "results/macs2/H3K27ac_E15.5_overlap.narrowPeak"
# expand("results/macs2/{stage}_H3K27ac_overlap.narrowPeak", zip, stage= STAGE, mark = MARK)
# directory("results/multiqc/multiqc_report_data/"), # directory("results/multiqc/multiqc_report_data/"),
# "results/multiqc/multiqc_report.html" # "results/multiqc/multiqc_report.html"
...@@ -267,22 +268,41 @@ rule mappingStats: ...@@ -267,22 +268,41 @@ rule mappingStats:
shell("samtools flagstat {input.c} > {output.c}") shell("samtools flagstat {input.c} > {output.c}")
shell("samtools flagstat {input.d} > {output.d}") shell("samtools flagstat {input.d} > {output.d}")
rule estimate_lib_complexity:
input: # rule sort_name:
"results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam" # input:
output: # "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.bam"
qc = "results/qc/{sample}_{stage}_{mark}.pbc.qc", # output:
log: # tmp = "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam"
"logs/{sample}_{stage}_{mark}.pbc" # log:
shell: # "logs/{sample}_{stage}_{mark}.pbc.sort"
""" # run:
bedtools bamtobed -bedpe -i {input} \ # shell("samtools sort -n {input} -o {output.tmp} 2> {log}")
| awk 'BEGIN{{OFS="\\t"}}{{print $1,$2,$4,$6,$9,$10}}' \ #
| sort | uniq -c \ # rule estimate_lib_complexity:
| awk 'BEGIN{{mt=0;m0=0;m1=0;m2=0}} ($1==1){{m1=m1+1}} ($1==2){{m2=m2+1}} \ # input:
{{m0=m0+1}} {{mt=mt+$1}} END{{printf "%d\\t%d\\t%d\\t%d\\t%f\\t%f\\t%f\\n" ,mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}}' \ # "results/bwa/{sample}_{stage}_{mark}_q30.dupmark.tmp.bam"
> {output.qc} # output:
""" # qc = "results/qc/{sample}_{stage}_{mark}.pbc.qc",
# log:
# "logs/{sample}_{stage}_{mark}.pbc"
# shell:
# """
# bedtools bamtobed -i {input} \
# | awk 'BEGIN{{OFS="\\t"}}{{print $1,$2,$4,$6}}' \
# | sort | uniq -c \
# | awk 'BEGIN{{mt=0;m0=0;m1=0;m2=0}} ($1==1){{m1=m1+1}} ($1==2){{m2=m2+1}} \
# {{m0=m0+1}} {{mt=mt+$1}} END{{printf "%d\\t%d\\t%d\\t%d\\t%f\\t%f\\t%f\\n" ,mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}}' \
# > {output.qc}
# """
#
## convert to bedPE
## print read 1 scaffold, read 1 start coordinate, read 2 scaffold, read 2 end coordinate, strand read 1, strand read 2
## remove mitochondrial genome
## sort by position and obtain uniq reads
## Format of file
## TotalReadPairs [tab] DistinctReadPairs [tab] OneReadPair [tab] TwoReadPairs [tab] NRF=Distinct/Total [tab] PBC1=OnePair/Distinct [tab] PBC2=OnePair/TwoPair
# =============================================================================================== # ===============================================================================================
# 5. deepTools # 5. deepTools
...@@ -470,23 +490,23 @@ rule call_peaks_macs2_pooled_replicates: ...@@ -470,23 +490,23 @@ rule call_peaks_macs2_pooled_replicates:
E12C = "results/bwa/input_E12.5_H3K27ac_q30.sorted.dedup.bam", E12C = "results/bwa/input_E12.5_H3K27ac_q30.sorted.dedup.bam",
E13C = "results/bwa/input_E13.5_H3K27ac_q30.sorted.dedup.bam", E13C = "results/bwa/input_E13.5_H3K27ac_q30.sorted.dedup.bam",
E14C = "results/bwa/input_E14.5_H3K27ac_q30.sorted.dedup.bam", E14C = "results/bwa/input_E14.5_H3K27ac_q30.sorted.dedup.bam",
E15C = "results/bwa/input_E15.5_H3K27ac_q30.sorted.dedup.bam", E15C = "results/bwa/input_E15.5_H3K27ac_q30.sorted.dedup.bam"
output: output:
directory("results/macs2/pooled") directory("results/macs2/pooled/")
log: log:
E10 = "results/qc/E10.5_H3K27ac.pooled.macs2", E10 = "results/qc/E10.5_H3K27ac.pooled.macs2",
E11 = "results/qc/E11.5_H3K27ac.pooled.macs2", E11 = "results/qc/E11.5_H3K27ac.pooled.macs2",
E12 = "results/qc/E12.5_H3K27ac.pooled.macs2", E12 = "results/qc/E12.5_H3K27ac.pooled.macs2",
E13 = "results/qc/E13.5_H3K27ac.pooled.macs2", E13 = "results/qc/E13.5_H3K27ac.pooled.macs2",
E14 = "results/qc/E14.5_H3K27ac.pooled.macs2", E14 = "results/qc/E14.5_H3K27ac.pooled.macs2",
E15 = "results/qc/E15.5_H3K27ac.pooled.macs2", E15 = "results/qc/E15.5_H3K27ac.pooled.macs2"
params: params:
E10 = "E10.5_H3K27ac.pooled.macs2", E10 = "E10.5_H3K27ac.pooled.macs2",
E11 = "E11.5_H3K27ac.pooled.macs2", E11 = "E11.5_H3K27ac.pooled.macs2",
E12 = "E12.5_H3K27ac.pooled.macs2", E12 = "E12.5_H3K27ac.pooled.macs2",
E13 = "E13.5_H3K27ac.pooled.macs2", E13 = "E13.5_H3K27ac.pooled.macs2",
E14 = "E14.5_H3K27ac.pooled.macs2", E14 = "E14.5_H3K27ac.pooled.macs2",
E15 = "E15.5_H3K27ac.pooled.macs2", E15 = "E15.5_H3K27ac.pooled.macs2"
run: run:
shell("macs2 callpeak -f BAM -t {input.E10} \ shell("macs2 callpeak -f BAM -t {input.E10} \
-c {input.E10C} --keep-dup all \ -c {input.E10C} --keep-dup all \
...@@ -556,12 +576,12 @@ rule frip: ...@@ -556,12 +576,12 @@ rule frip:
rule overlap_peaks: rule overlap_peaks:
input: input:
E10= expand(["results/macs2/{case}_vs_{control}_E10.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK), E10= ["results/macs2/ENCFF213EBC_vs_ENCFF157KEH_E10.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF548BRR_vs_ENCFF825AVI_E10.5_H3K27ac_macs2_peaks.narrowPeak"],
E11= expand(["results/macs2/{case}_vs_{control}_E11.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK), E11= ["results/macs2/ENCFF512SFE_vs_ENCFF184CUE_E11.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF515PKL_vs_ENCFF376FGM_E11.5_H3K27ac_macs2_peaks.narrowPeak"],
E12= expand(["results/macs2/{case}_vs_{control}_E12.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK), E12= ["results/macs2/ENCFF011NFM_vs_ENCFF058AUT_E12.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF394TZN_vs_ENCFF203JQV_E12.5_H3K27ac_macs2_peaks.narrowPeak"],
E13= expand(["results/macs2/{case}_vs_{control}_E13.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK), E13= ["results/macs2/ENCFF194ORC_vs_ENCFF117QRC_E13.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF290ZNF_vs_ENCFF248PGK_E13.5_H3K27ac_macs2_peaks.narrowPeak"],
E14= expand(["results/macs2/{case}_vs_{control}_E14.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK), E14= ["results/macs2/ENCFF902HAR_vs_ENCFF002HZV_E14.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF327VAO_vs_ENCFF784ORI_E14.5_H3K27ac_macs2_peaks.narrowPeak"],
E15= expand(["results/macs2/{case}_vs_{control}_E15.5_H3K27ac_macs2_peaks.narrowPeak"], zip, case=IPS, control=INPUTS, mark=MARK), E15= ["results/macs2/ENCFF707WKL_vs_ENCFF182XFG_E15.5_H3K27ac_macs2_peaks.narrowPeak", "results/macs2/ENCFF584JFB_vs_ENCFF727QTS_E15.5_H3K27ac_macs2_peaks.narrowPeak"],
E10_pooled="results/macs2/E10.5_H3K27ac.pooled.macs2_peaks.narrowPeak", E10_pooled="results/macs2/E10.5_H3K27ac.pooled.macs2_peaks.narrowPeak",
E11_pooled="results/macs2/E11.5_H3K27ac.pooled.macs2_peaks.narrowPeak", E11_pooled="results/macs2/E11.5_H3K27ac.pooled.macs2_peaks.narrowPeak",
E12_pooled="results/macs2/E12.5_H3K27ac.pooled.macs2_peaks.narrowPeak", E12_pooled="results/macs2/E12.5_H3K27ac.pooled.macs2_peaks.narrowPeak",
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment