Skip to content
Snippets Groups Projects
Commit 0bae9dff authored by Laura Cook's avatar Laura Cook
Browse files

first commit

parent d85d9ee5
Branches
No related tags found
No related merge requests found
#! /usr/bin/env python
# Author: Adrian Foucal https://gitlab.univ-nantes.fr/foucal-a/full-chipseq/-/blob/master/scripts/count_peaks.py
import json
import argparse
import subprocess
parser = argparse.ArgumentParser()
parser.add_argument("--peak_type", help="Required. Peak type for section title. eg broadPeak, narrowpeak...")
parser.add_argument("--sample_name",nargs='+', help="Required. name of your samples")
parser.add_argument("--peaks", nargs = '+', help = "Called peaks, no header")
args = parser.parse_args()
def count_lines(fn):
return(int(subprocess.check_output("wc -l " + fn, shell=True).split()[0]))
# Go through all sample_name, identify corresponding peak files and get the count in a dict
sample_count = {}
for sample_name in args.sample_name:
sample_count[sample_name] = {"Peak number": [count_lines(fn) for index, fn in enumerate(args.peaks) if sample_name in fn][0]}
# Get the MultiQC output
multiqc_output = {
"id": "MACS2_" + args.peak_type + "_count" ,
"section_anchor": "MACS2 peak",
"section_name": "MACS2 " + args.peak_type + " counts",
"description": "",
"plot_type": "bargraph",
"pconfig": {
"id": "MACS2 " + args.peak_type,
"ylab": "# Peaks",
"cpswitch": False, # Show the 'Counts / Percentages' switch?
"cpswitch_c_active": True
},
"data": sample_count
}
print(json.dumps(multiqc_output, indent = 4))
#! /usr/bin/env python
## Author: Laura E Cook, University of Melbourne
## Purpose:
# 1. Intersect aligned reads with called peaks
# 2. Count number of lines in intersected file
# 3. Count number of lines in aligned reads (BED) file
# 4. Calculate fraction of peaks that are in the aligned reads
# 5. ENCODE guidelines state that FRiP > 0.3 is optimal, FRiP > 0.2 is acceptable
from pybedtools import BedTool
import sys
import os
# bed file (converted from aligned BAM file to bed)
bed = BedTool(sys.argv[1])
# called narrowPeak file from MACS2
peak = BedTool(sys.argv[2])
# overlap bed and peak files
overlap = bed.intersect(peak, nonamecheck=True, wa=True, u=True)
num_lines_overlap = 0
# determine how many lines in overlap file
for line in overlap:
num_lines_overlap += 1
print 'Number of reads that intersect with peaks = ', num_lines_overlap
num_lines_bed = 0
# determine how many lines in original bed file
for line in bed:
num_lines_bed +=1
print 'Number of total reads = ', num_lines_bed
# print the fraction of peaks in aligned reads
print 'Fraction of Reads in Peak = ', str(float(num_lines_overlap)/float(num_lines_bed))
#! /usr/bin/env python
## Author: Laura E Cook, University of Melbourne
## Purpose:
# 1. Import replicate 1, replicate 2, and output file name from snakemake rule
# 2. Concatonate peak 1 adn peak 2 to get a pooled_peaks file
# 3. Intersect pooled peaks with peak 1
# 4. Write the original A and B entries plus the number of base pairs of overlap between the two features. Only A features with overlap are reported.
# 5. Calculate
# 5.
import sys
import os
import subprocess
# replicate 1
peak1 = sys.argv[1]
# replicate 2
peak2 = sys.argv[2]
# pooled peaks
pooled = sys.argv[3]
output = sys.argv[4]
awk_param = '{s1=$3-$2; s2=$13-$12; if (($21/s1 >= 0.5) || ($21/s2 >= 0.5)) {print $0}}'
cut_param = '1-10'
cmd1 = 'intersectBed -wo '
cmd1 += '-a {} -b {} | '
cmd1 += 'awk \'BEGIN{{FS="\\t";OFS="\\t"}} {}\' | '
cmd1 += 'cut -f {} | sort | uniq | '
cmd1 += 'intersectBed -wo '
cmd1 += '-a stdin -b {} | '
cmd1 += 'awk \'BEGIN{{FS="\\t";OFS="\\t"}} {}\' | '
cmd1 += 'cut -f {} | sort | uniq > {}'
cmd2 = cmd1.format(
pooled, # peak_pooled
peak1, # peak1
awk_param,
cut_param,
peak2, # peak2
awk_param,
cut_param,
output)
os.system(cmd2)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment