From 708da2b7c7c33bb2a58f22aff6b1419f42fef0a3 Mon Sep 17 00:00:00 2001 From: Laura Cook <l.cook2@student.unimelb.edu.au> Date: Wed, 23 Sep 2020 19:58:59 +1000 Subject: [PATCH] first commit - script parses a multiFASTA and output a separate fasta file for each sequence header --- .../scripts/parse_FASTA.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 cross_species_comparison/scripts/parse_FASTA.py diff --git a/cross_species_comparison/scripts/parse_FASTA.py b/cross_species_comparison/scripts/parse_FASTA.py new file mode 100644 index 0000000..e6bde95 --- /dev/null +++ b/cross_species_comparison/scripts/parse_FASTA.py @@ -0,0 +1,24 @@ +#!usr/bin/env python3 + +import string +import random +import sys +import os +from Bio import SeqIO + +## usage python3 parse_FASTA.py [input.fasta] > [output.fasta] + +file = sys.argv[1] + +# Loop through all the files in the variable +with open(file, 'r') as all_TWARs: + + # for each record (TWAR header) in the file parse it as a FASTA + for record in SeqIO.parse(all_TWARs, "fasta"): + # set the record ID to a variable + id = record.id + # set the TWAR sequence to a variable + seq = record.seq + + print(">" + str(id) + "\n" + str(seq) + "*") +all_TWARs.close() -- GitLab