diff --git a/mouse/scripts/count_peaks.py b/mouse/scripts/count_peaks.py new file mode 100644 index 0000000000000000000000000000000000000000..6d9f01c24716ee90681a039c09e17d63e0136f27 --- /dev/null +++ b/mouse/scripts/count_peaks.py @@ -0,0 +1,43 @@ +#! /usr/bin/env python + +# Author: Adrian Foucal https://gitlab.univ-nantes.fr/foucal-a/full-chipseq/-/blob/master/scripts/count_peaks.py + +import json +import argparse +import subprocess + +parser = argparse.ArgumentParser() +parser.add_argument("--peak_type", help="Required. Peak type for section title. eg broadPeak, narrowpeak...") +parser.add_argument("--sample_name",nargs='+', help="Required. name of your samples") +parser.add_argument("--peaks", nargs = '+', help = "Called peaks, no header") + +args = parser.parse_args() + + +def count_lines(fn): + return(int(subprocess.check_output("wc -l " + fn, shell=True).split()[0])) + +# Go through all sample_name, identify corresponding peak files and get the count in a dict +sample_count = {} +for sample_name in args.sample_name: + sample_count[sample_name] = {"Peak number": [count_lines(fn) for index, fn in enumerate(args.peaks) if sample_name in fn][0]} + + +# Get the MultiQC output + +multiqc_output = { + "id": "MACS2_" + args.peak_type + "_count" , + "section_anchor": "MACS2 peak", + "section_name": "MACS2 " + args.peak_type + " counts", + "description": "", + "plot_type": "bargraph", + "pconfig": { + "id": "MACS2 " + args.peak_type, + "ylab": "# Peaks", + "cpswitch": False, # Show the 'Counts / Percentages' switch? + "cpswitch_c_active": True + }, + "data": sample_count +} + +print(json.dumps(multiqc_output, indent = 4)) diff --git a/mouse/scripts/encode_frip.py b/mouse/scripts/encode_frip.py new file mode 100644 index 0000000000000000000000000000000000000000..a6dc88b0d13df4cea96073514337d50484df6060 --- /dev/null +++ b/mouse/scripts/encode_frip.py @@ -0,0 +1,40 @@ +#! /usr/bin/env python + +## Author: Laura E Cook, University of Melbourne +## Purpose: +# 1. Intersect aligned reads with called peaks +# 2. Count number of lines in intersected file +# 3. Count number of lines in aligned reads (BED) file +# 4. Calculate fraction of peaks that are in the aligned reads +# 5. ENCODE guidelines state that FRiP > 0.3 is optimal, FRiP > 0.2 is acceptable + +from pybedtools import BedTool + +import sys +import os + +# bed file (converted from aligned BAM file to bed) +bed = BedTool(sys.argv[1]) + +# called narrowPeak file from MACS2 +peak = BedTool(sys.argv[2]) + +# overlap bed and peak files +overlap = bed.intersect(peak, nonamecheck=True, wa=True, u=True) + +num_lines_overlap = 0 + +# determine how many lines in overlap file +for line in overlap: + num_lines_overlap += 1 +print 'Number of reads that intersect with peaks = ', num_lines_overlap + +num_lines_bed = 0 + +# determine how many lines in original bed file +for line in bed: + num_lines_bed +=1 +print 'Number of total reads = ', num_lines_bed + +# print the fraction of peaks in aligned reads +print 'Fraction of Reads in Peak = ', str(float(num_lines_overlap)/float(num_lines_bed)) diff --git a/mouse/scripts/overlap_peaks.py b/mouse/scripts/overlap_peaks.py new file mode 100644 index 0000000000000000000000000000000000000000..fe36e8a592611c9e3fb0566f43eaf7a84fdf8575 --- /dev/null +++ b/mouse/scripts/overlap_peaks.py @@ -0,0 +1,48 @@ +#! /usr/bin/env python + +## Author: Laura E Cook, University of Melbourne +## Purpose: +# 1. Import replicate 1, replicate 2, and output file name from snakemake rule +# 2. Concatonate peak 1 adn peak 2 to get a pooled_peaks file +# 3. Intersect pooled peaks with peak 1 +# 4. Write the original A and B entries plus the number of base pairs of overlap between the two features. Only A features with overlap are reported. +# 5. Calculate +# 5. + +import sys +import os +import subprocess + +# replicate 1 +peak1 = sys.argv[1] + +# replicate 2 +peak2 = sys.argv[2] + +# pooled peaks +pooled = sys.argv[3] + +output = sys.argv[4] + +awk_param = '{s1=$3-$2; s2=$13-$12; if (($21/s1 >= 0.5) || ($21/s2 >= 0.5)) {print $0}}' +cut_param = '1-10' + +cmd1 = 'intersectBed -wo ' +cmd1 += '-a {} -b {} | ' +cmd1 += 'awk \'BEGIN{{FS="\\t";OFS="\\t"}} {}\' | ' +cmd1 += 'cut -f {} | sort | uniq | ' +cmd1 += 'intersectBed -wo ' +cmd1 += '-a stdin -b {} | ' +cmd1 += 'awk \'BEGIN{{FS="\\t";OFS="\\t"}} {}\' | ' +cmd1 += 'cut -f {} | sort | uniq > {}' +cmd2 = cmd1.format( + pooled, # peak_pooled + peak1, # peak1 + awk_param, + cut_param, + peak2, # peak2 + awk_param, + cut_param, + output) + +os.system(cmd2)