diff --git a/dunnart-chipseq/README.md b/dunnart-chipseq/README.md
index 86cbabebc49903076eff598b55f129e6a97d509f..802330c2e85338989663d694dd3fdfa3222a8319 100755
--- a/dunnart-chipseq/README.md
+++ b/dunnart-chipseq/README.md
@@ -193,15 +193,7 @@ Sample configuration file: configs/config.yaml\
 Sample text file: configs/SSR.text\
 multiQC configuration file: configs/.multiqc_config.yaml\
 
-
-#  1. FASTQC
-
-### rule fastqc:
-See (insert link to multiQC file) for fastqc output.
-
-#  2. ALIGNMENT
-
-### rule align:
+# ALIGNMENT
 
 <details><summary>_Notes on Bowtie2 Mapping Scores_</summary>
 <p>
@@ -295,7 +287,7 @@ bowtie2-build Scras_dunnart_assem1.0_pb-ont-illsr_flyeassem_red-rd-scfitr2_pil2x
 bowtie2-build Sminthopsis_crassicaudata_HiC.fasta Sminthopsis_crassicaudata_HiC
 ```
 
-#  3. FILTERING
+#  FILTERING
 
 ### rule filter:
 
@@ -316,7 +308,7 @@ Uses samtools and picards MarkDuplicates to remove low quality reads. This is ba
 
 Use `samtools index` to index BAM file for use with deeptools.
 
-#  4. GENERAL ALIGNMENT QC
+# GENERAL ALIGNMENT QC
 
 ### rule mappingStats:
 
@@ -330,11 +322,6 @@ Use `samtools flagstat` to get stats at each step in the filtering so I know exa
 
 The preseq package is aimed at predicting and estimating the complexity of a genomic sequencing library, equivalent to predicting and estimating the number of redundant reads from a given sequencing depth and how many will be expected from additional sequencing using an initial sequencing experiment. The estimates can then be used to examine the utility of further sequencing, optimize the sequencing depth, or to screen multiple libraries to avoid low complexity samples.
 
-
-### rule get_picard_complexity_metrics:
-
-### rule sort_name:
-
 ### rule estimate_lib_complexity:
 
 This follows exactly the code used the in thee ENCODE chip-seq pipeline for computing the non-redundant fraction (NRF), PCR bottlenecking cofficient 1 & 2 (PBC1 and PBC2). The non-redundant fraction is the proportion of reads that have a distinct start coordinate in read pair 1 and a distinct end coordinate in read pair 2 (as opposed to duplicate reads that have exactly the same coordinate) out of all uniquely mapped read pairs.
@@ -358,7 +345,7 @@ Library Complexity ChIP-seq Standards:
 | 0.8 â‰¤ PBC1 < 0.9 | 3 â‰¤ PBC2 < 10 | Mild | 0.8 â‰¤ NRF < 0.9 | Compliant | None |
 | â‰¥ 0.9 | â‰¥ 10 | None | > 0.9 | Ideal | None |
 
-#  5. deepTools
+#  deepTools
 
 ### rule deeptools_summary:
 
@@ -390,15 +377,11 @@ It determines how well the signal in the ChIP-seq sample can be differentiated f
 
 An ideal input with perfect uniform distribution of reads along the genome (i.e. without enrichments in open chromatin etc.) and infinite sequencing coverage should generate a straight diagonal line. A very specific and strong ChIP enrichment will be indicated by a prominent and steep rise of the cumulative sum towards the highest rank. This means that a big chunk of reads from the ChIP sample is located in few bins which corresponds to high, narrow enrichments typically seen for transcription factors.
 
-
-### rule deeptools_plotCoverage:
-
 ### rule deeptools_bamPEFragmentSize:
 
-This tool calculates the fragment sizes for read pairs given a BAM file from paired-end sequencing.Several regions are sampled depending on the size of the genome and number of processors to estimate thesummary statistics on the fragment lengths.
+This tool calculates the fragment sizes for read pairs given a BAM file from paired-end sequencing. Several regions are sampled depending on the size of the genome and number of processors to estimate thesummary statistics on the fragment lengths.
 
-
-#  6. Cross-correlation
+#  Cross-correlation
 
 Information from: https://docs.google.com/document/d/1lG_Rd7fnYgRpSIqrIfuVlAz2dW1VaSQThzk836Db99c/edit
 
@@ -468,7 +451,7 @@ Notes from ENCODE pipeline:
 __CC_SCORE FILE format__
 Filename <tab> numReads <tab> estFragLen <tab> corr_estFragLen <tab> PhantomPeak <tab> corr_phantomPeak <tab> argmin_corr <tab> min_corr <tab> phantomPeakCoef <tab> relPhantomPeakCoef <tab> QualityTag
 
-#  7. Call peaks (MACS2)
+# Call peaks (MACS2)
 
 __Input file options__
 
@@ -518,7 +501,7 @@ Run script in the directory with the files you want to subsample.
 - `summits.bed`: peak summits locations for every peak. To find the motifs at the binding sites, this file is recommended
 
 
-#  8. Peak QC
+#  Peak QC
 
 
 ### rule get_narrow_peak_counts_for_multiqc:
@@ -536,7 +519,7 @@ Convert BAM to tagAlign file for calculating FRiP QC metric (Fraction of reads i
 Calculate fraction of reads in peaks using a custom script based on ENCODE pipeline code - `scripts/encode_frip.py`
 
 
-#  9. Create consensus peaksets for replicates
+# Create consensus peaksets for replicates
 
 Edited version of ENCODE `overlap_peaks.py` - recommended for histone marks.
 
@@ -565,7 +548,7 @@ Overlap replicate peaks for H3K4me3.
 
 Overlap replicate peaks for H3K27ac.
 
-# 10. Consensus peaks and grouping into putative enhancers and promoters
+# Consensus peaks and grouping into putative enhancers and promoters
 ## Merge peaks with max distance between peaks of 50bp 
 
 ```
@@ -600,57 +583,5 @@ cat H3K27ac_only.narrowPeak | sort -k 4,4 > dunnart_enhancer_peaks.narrowPeak
 sort -k1,1 -k2,2n dunnart_enhancers.narrowPeak | bedtools merge -i stdin -d 50 > dunnart_enhancer_mergedPeaks.narrowPeak
 ```
 
-
-# 12. Find overlap with TWARs
-
-## Get dunnart bed coordinates for TWARs
-
-Use BLASTn to blast Tasmanian devil TWAR sequences against dunnart genome to find coordinates for TWARs.
-
-```
-blastn -task blastn -num_threads 4 -db dunnart_pseudochr_vs_mSarHar1.11_v1.fasta -query sarHar1_twars.fa -out dunnart_TWARs.bed -outfmt "6 sseqid sstart send qseqid length"
-```
-
-## Intersect with H3K4me3 and H3K27ac
-
-```
-intersectBed -wb -a H3K4me3_overlap.narrowPeak -b dunnart_TWARs.bed > twar_H3K4me3_overlap.bed
-```
-
-```
-intersectBed -a H3K27ac_overlap.narrowPeak -b dunnart_TWARs.bed > twar_H3K27ac_overlap.bed
-```
-
-```
-intersectBed -a H3K4me3_only.narrowPeak -b dunnart_TWARs.bed > twar_H3K4me3_only_overlap.bed
-```
-
-```
-intersectBed -a H3K27ac_only.narrowPeak -b dunnart_TWARs.bed > twar_H3K27ac_only_overlap.bed
-```
-
-```
-intersectBed -a H3K4me3_and_H3K27ac.narrowPeak -b dunnart_TWARs.bed > twar_H3K27ac_and_H3K4me3_overlap.bed
-```
-
-
-# 12. Annotate Peaks
-
-Figures
-
-Dunnart full peaks
-
-
-Figure 1:
-â€¢	Show figure comparing the number of active regulatory regions (H3K27ac, H3K4me3&H3K27ac, and H3K4me3) for each group 
-â€¢	Figure of tree showing evolutionary distance between dunnart and mouse
-â€¢	Figure showing what each embryonic mouse stage and dunnart P0 stage actually look like
-
-Figure 2:
-â€¢	Number of enhancers and promoters
-â€¢	Promoter and enhancer activity conserved between the different mouse embryonic stages and dunnart
-â€¢	Distance to nearest TSS
-
-
-Dunnart/mouse 10M reads peaks
-
+# Peak Features, Annotation, GO, motif enrichment
+see scripts folder.
diff --git a/dunnart-chipseq/Snakefile b/dunnart-chipseq/Snakefile
index 6fda8b979ef68b1b5e9f0c7e17d74d87e1cccc0b..8f9773e6af4cf3a270515dcc9a20f11c2fcecca3 100755
--- a/dunnart-chipseq/Snakefile
+++ b/dunnart-chipseq/Snakefile
@@ -1,6 +1,7 @@
 ## Author: Laura E Cook, University of Melbourne
 ## Purpose: ChIP-seq pipeline for histone marks
 ## This workflow only works for paired-end sequencing reads
+
     # 1. FastQC on raw reads
     # 2. Alignment
     # 3. Filtering
@@ -10,7 +11,7 @@
     # 7. Call narrow peaks (MACS2)
     # 8. Peak QC
     # 9. Create consensus peaksets
-    # 10. Present QC for raw read, alignment, peak-calling in MultiQC
+    # 10. Present QC for raw read, alignment, peak-calling in MultiQC (this part is a work in progress)
 
 
 configfile: "envs/config.yaml"
diff --git a/dunnart-chipseq/Snakefile_10M b/dunnart-chipseq/Snakefile_10M
index 093151426167286b737cc71ae60a097b9aaefb20..757bc9b6a3ac42296a9073aa7f8b77b37b7a7a04 100644
--- a/dunnart-chipseq/Snakefile_10M
+++ b/dunnart-chipseq/Snakefile_10M
@@ -1,6 +1,7 @@
 ## Author: Laura E Cook, University of Melbourne
 ## Purpose: ChIP-seq pipeline for histone marks
 ## This workflow only works for paired-end sequencing reads
+## This is run for dunnart ChIP-seq library subsampled to 10 million reads
     # 1. FastQC on raw reads
     # 2. Alignment
     # 3. Filtering
@@ -10,7 +11,7 @@
     # 7. Call narrow peaks (MACS2)
     # 8. Peak QC
     # 9. Create consensus peaksets
-    # 10. Present QC for raw read, alignment, peak-calling in MultiQC
+    # 10. Present QC for raw read, alignment, peak-calling in MultiQC (this is still a work in progress)
 
 #! /usr/bin/env python
 
diff --git a/dunnart-chipseq/scripts/GC_content.R b/dunnart-chipseq/scripts/GC_content.R
deleted file mode 100644
index 6db2d9728f4e171faf3b231853098abc9da31e75..0000000000000000000000000000000000000000
--- a/dunnart-chipseq/scripts/GC_content.R
+++ /dev/null
@@ -1,66 +0,0 @@
-# GC content and CpG % plots 
-
-library(data.table) 
-library(tidyverse)
-library(ggridges)
-library(ggpubr)
-library(reshape2)
-library(RColorBrewer)
-library(ggplot2)
-library(VennDiagram)
-library(viridis)
-library(hrbrthemes)
-library(gghalves)
-library(dplyr)
-
-
-promoter <- fread("dunnart_promoters_homerAnnot.txt")
-enhancer <- fread("dunnart_enhancers_homerAnnot.txt")
-peaks = list(promoter, enhancer)
-peaks = Map(mutate,peaks,cre=c("promoters", "enhancers"))
-peaks = lapply(peaks, function(x) x %>% dplyr::select("Chr", "Start", "End", "Peak Score", "Distance to TSS", "CpG%", "GC%", "cre") %>% as.data.table())
-peaks = rbindlist(peaks)
-colnames(peaks)[5:7] <- c("distanceToTSS","CpG", "GC")
-peaks[is.na(peaks)] <- 0
-peaks = peaks[,log10_abs_dist:=log10(abs(distanceToTSS)+1)]
-promoter = subset(peaks, cre == "promoters")
-enhancer = subset(peaks, cre == "enhancers")
-promoter.subset=subset(promoter, log10_abs_dist >= 3.5)
-promoter.subset2 = subset(promoter, log10_abs_dist <3.5)
-promoter.subset$cre = "subset>3.5"
-promoter.subset2$cre = "subset<3.5"
-data = rbind(enhancer, promoter, promoter.subset2, promoter.subset)
-
-p <- ggplot(data, aes(factor(cre), y = GC)) + 
-  #geom_violin(aes(fill=factor(cre), color=factor(cre)),
-  #  position = "dodge"
-  #  )+
-  geom_boxplot(aes(color=factor(cre)),
-    #outlier.shape = NA,
-    width = .2, 
-    notch = TRUE,
-    fill=c("#FCF8EC","#E4F3F1", "#FCF8EC","#E4F3F1"),
-    position = position_dodge(width=.1)
-  ) + theme_bw() + xlab("") + ylab("CpG %") 
-pdf("GC.pdf", width=9, height = 8)
-p +  scale_color_manual(values = c("#E9C46A","#2A9D8F","#E9C46A","#2A9D8F")) +
-scale_fill_manual(values = c("#F1DAA2","#7AC2B9","#E9C46A","#2A9D8F"))
-dev.off()
-
-pairwise.wilcox.test(data$GC, data$cre, p.adjust.method="fdr")
-
-p <- ggplot(peaks, aes(factor(cre), y = GC.width)) + 
-  geom_violin(aes(fill=factor(cre), color=factor(cre)),
-    position = "dodge"
-    )+
-  geom_boxplot(aes(color=factor(cre)),
-    outlier.shape = NA,
-    width = .15, 
-    notch = TRUE,
-    fill=c("#FCF8EC","#E4F3F1"),
-    position = position_dodge(width=.1)
-  ) + theme_bw() + xlab("") + ylab("GC content %") 
-pdf("GC.width_plot.pdf", width=9, height = 8)
-p +  scale_color_manual(values = c("#E9C46A","#2A9D8F")) +
-scale_fill_manual(values = c("#F1DAA2","#7AC2B9")) + stat_compare_means(method = "wilcox.test" )
-dev.off()
\ No newline at end of file
diff --git a/dunnart-chipseq/scripts/GO_plots.R b/dunnart-chipseq/scripts/GO_plots.R
deleted file mode 100755
index 8792515b5c0b7bbb1fb44220cd7d09ed302ee9a0..0000000000000000000000000000000000000000
--- a/dunnart-chipseq/scripts/GO_plots.R
+++ /dev/null
@@ -1,80 +0,0 @@
-## Script that takes as input file with GO enriched terms and returns you a barplot
-library(dplyr); library(data.table)
-library(magrittr); 
-library(ggthemes);library(ggplot2);library(ggpubr)
-
-## change here the working directory(ies)
-
-go_files_dir='/data/projects/punim0586/lecook/chipseq-pipeline/dunnart/results/macs2/annotations/KEGG/'
-plot_dir='/data/projects/punim0586/lecook/chipseq-pipeline/dunnart/results/macs2/plots/'
-
-## read files into the directory
-## it creates a list of dataframes (one per file)
-## then discards the geneID field 
-
-read_files=function(dir){
-  x=as.character(list.files(dir,recursive =F,full.names = T)) %>% 
-    lapply(function(y)
-      fread(y,sep = '\t',header = T)[
-        ,geneID:=NULL
-      ] %>% setorderv('p.adjust',1)) ## sorted by most significant ones 
-  ## this returns you the filename without file extension. 
-  ## I will use it as plot title below (you can remove it if u want)
-  file_names=as.character(list.files(dir,recursive =F,full.names = F))
-  file_names=gsub("\\..*","",file_names) 
-  names(x)=file_names
-  ## I will make a new field with the filename that I will use as categorical variable to assign different colors
-  ## i.e. each plot will have a different color
-  x=Map(mutate,x,filename=names(x))
-  x=lapply(x,function(y)y=as.data.table(y))
-  return(x)
-}
-go_terms=read_files(go_files_dir)
-
-## I will make another column with the color for the given file. 
-## all row entries will be identical but I just need this for plotting
-## I am going to do in this way so that u can change the colors as u like for each file
-
-colors=c('#6699CC','#6699CC','#6699CC','#6699CC') ## remember to add a color per input file
-
-go_colors=Map(cbind, go_terms, color=colors)
-go_colors=lapply(go_colors, function(x)x=as.data.table(x))
-
-## this keeps only the significant GO terms
-significant_go_terms=lapply(go_colors,function(x)x=x[p.adjust<=0.05])
-
-plot_results=function(file){
-  
-  top_n_GOs=copy(file)
-  top_n_GOs=top_n_GOs[1:30]
-  
-  ggplot(top_n_GOs, 
-         aes(x=reorder(Description,-log10(p.adjust)), y=-log10(p.adjust),fill=filename)) + ## this reorders the x-axis and converts the p-adj to log10 values
-    geom_bar(stat = 'identity',position = 'dodge')+
-    xlab(" ") +
-    ylab("\n -Log10 (P) \n ") +
-    scale_y_continuous(breaks = round(seq(0, max(-log10(top_n_GOs$p.adjust)), by = 2), 1)) +
-    scale_fill_manual(name= " ",values=top_n_GOs$color)+
-    theme( ## these are just parameters to change the theme of the plot. you can add more or remove them
-      legend.position='none',
-      legend.background=element_rect(),
-      axis.text.x=element_text(angle=0, hjust=1.10),
-      axis.text.y=element_text(angle=0, vjust=0.8),
-      axis.title=element_text(),
-      axis.line = element_line(color = "black",size = 0, linetype = "solid"),
-      panel.background =element_rect(fill = 'white', size = 0.5,colour = 'black'),
-      panel.grid.minor = element_blank(),
-      panel.grid.major = element_blank(),
-      title=element_text()) +
-    coord_flip()
-  
-}
-
-go_plot=lapply(significant_go_terms,function(x)x=plot_results(x))
-
-
-# Save each plot to a different pdf in the plot_dir/
-lapply(names(go_plot), 
-       function(x)ggsave(filename=paste(plot_dir,x,'_plot',".pdf",sep=""), 
-                         plot=go_plot[[x]]))
-
diff --git a/dunnart-chipseq/scripts/genes.peaks_perScaffold.R b/dunnart-chipseq/scripts/genes.peaks_perScaffold.R
index 79d64b3c1d630a1b33bba7c127ca857460957d1d..beeb6d9594f4a2c6e5528bd1551a7449fc5687a7 100644
--- a/dunnart-chipseq/scripts/genes.peaks_perScaffold.R
+++ b/dunnart-chipseq/scripts/genes.peaks_perScaffold.R
@@ -1,4 +1,5 @@
 ## Number of peaks per gene and distance to the nearest gene
+
 library(dplyr)
 library(tidyverse)
 library(data.table)
diff --git a/dunnart-chipseq/scripts/subsample.sh b/dunnart-chipseq/scripts/subsample.sh
index 322a400d1982deff4cc3b77e2ac48b1aa90ff9ac..3523c74b2177c0f553d122f824992da6f6128f33 100755
--- a/dunnart-chipseq/scripts/subsample.sh
+++ b/dunnart-chipseq/scripts/subsample.sh
@@ -1,4 +1,4 @@
-
+## Command to subsample bam files
 
 TRA=($(for file in B*_PPq30.sorted.dedup.bam; do echo $file |cut -d "_" -f 1-2;done))
 
diff --git a/dunnart-chipseq/scripts/twars_in_dunnart_peaks_hyperTest.R b/dunnart-chipseq/scripts/twars_in_dunnart_peaks_hyperTest.R
index 9629beb8d7034db14ed3cf6e571da87dd9939db4..e9ffa471ed118633222512151a4d899d9bff0b10 100644
--- a/dunnart-chipseq/scripts/twars_in_dunnart_peaks_hyperTest.R
+++ b/dunnart-chipseq/scripts/twars_in_dunnart_peaks_hyperTest.R
@@ -1,3 +1,5 @@
+
+
 library(ChIPseeker)
 library(data.table)
 library(dplyr)