From 708da2b7c7c33bb2a58f22aff6b1419f42fef0a3 Mon Sep 17 00:00:00 2001
From: Laura Cook <l.cook2@student.unimelb.edu.au>
Date: Wed, 23 Sep 2020 19:58:59 +1000
Subject: [PATCH] first commit - script parses a multiFASTA and output a
 separate fasta file for each sequence header

---
 .../scripts/parse_FASTA.py                    | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 cross_species_comparison/scripts/parse_FASTA.py

diff --git a/cross_species_comparison/scripts/parse_FASTA.py b/cross_species_comparison/scripts/parse_FASTA.py
new file mode 100644
index 0000000..e6bde95
--- /dev/null
+++ b/cross_species_comparison/scripts/parse_FASTA.py
@@ -0,0 +1,24 @@
+#!usr/bin/env python3
+
+import string
+import random
+import sys
+import os
+from Bio import SeqIO
+
+## usage python3 parse_FASTA.py [input.fasta] > [output.fasta]
+
+file = sys.argv[1]
+
+# Loop through all the files in the variable
+with open(file, 'r') as all_TWARs:
+
+    # for each record (TWAR header) in the file parse it as a FASTA
+    for record in SeqIO.parse(all_TWARs, "fasta"):
+        # set the record ID to a variable
+        id = record.id
+        # set the TWAR sequence to a variable
+        seq = record.seq
+
+        print(">" + str(id) + "\n" + str(seq) + "*")
+all_TWARs.close()
-- 
GitLab