update ORF to python 3

2025-02-18 21:08:41 -05:00
parent 43de339fc5
commit 222e3ee447
1 changed files with 122 additions and 76 deletions
--- a/python/ORF.py
+++ b/python/ORF.py
@@ -1,102 +1,148 @@
 #!/usr/bin/python

-#Solution to the ORF rosalind problem - 'Open Reading Frames'
-#Author: Peter Vlasveld
+# Solution to the ORF rosalind problem - 'Open Reading Frames'
+# Author: Peter Vlasveld

-#function to generate the orfs
+# function to generate the orfs
 def orfGen(stri):
-	#dictionary for translation
-	translate = { 
-		"AAA":'K', "AAG":'K',
-		"GAA":'E', "GAG":'E',
-		"AAC":'N', "AAU":'N',
-		"GAC":'D', "GAU":'D',
-		"ACA":'T', "ACC":'T', "ACG":'T', "ACU":'T',
-		"GCA":"A", "GCC":"A", "GCG":"A", "GCU":"A",
-		"GGA":"G", "GGC":"G", "GGG":"G", "GGU":"G",
-		"GUA":"V", "GUC":"V", "GUG":"V", "GUU":"V",
-		"AUG":"M",
-		"UAA":"*", "UAG":"*", "UGA":"*",
-		"AUC":"I", "AUU":"I", "AUA":"I",
-		"UAC":"Y", "UAU":"Y",
-		"CAA":"Q", "CAG":"Q",
-		"AGC":"S", "AGU":"S", "UCA":"S", "UCC":"S", "UCG":"S", "UCU":"S",
-		"CAC":"H", "CAU":"H",
-		"UGC":"C", "UGU":"C",
-		"CCA":"P", "CCC":"P", "CCG":"P", "CCU":"P",
-		"UGG":"W",
-		"AGA":"R", "AGG":"R", "CGA":"R", "CGC":"R", "CGG":"R", "CGU":"R",
-		"UUA":"L", "UUG":"L", "CUA":"L", "CUC":"L", "CUG":"L", "CUU":"L",
-		"UUC":"F", "UUU":"F"
-	}
-	
-	#list to contain protein sequences
-	proteins = []
+    # dictionary for translation
+    translate = {
+        "AAA": "K",
+        "AAG": "K",
+        "GAA": "E",
+        "GAG": "E",
+        "AAC": "N",
+        "AAU": "N",
+        "GAC": "D",
+        "GAU": "D",
+        "ACA": "T",
+        "ACC": "T",
+        "ACG": "T",
+        "ACU": "T",
+        "GCA": "A",
+        "GCC": "A",
+        "GCG": "A",
+        "GCU": "A",
+        "GGA": "G",
+        "GGC": "G",
+        "GGG": "G",
+        "GGU": "G",
+        "GUA": "V",
+        "GUC": "V",
+        "GUG": "V",
+        "GUU": "V",
+        "AUG": "M",
+        "UAA": "*",
+        "UAG": "*",
+        "UGA": "*",
+        "AUC": "I",
+        "AUU": "I",
+        "AUA": "I",
+        "UAC": "Y",
+        "UAU": "Y",
+        "CAA": "Q",
+        "CAG": "Q",
+        "AGC": "S",
+        "AGU": "S",
+        "UCA": "S",
+        "UCC": "S",
+        "UCG": "S",
+        "UCU": "S",
+        "CAC": "H",
+        "CAU": "H",
+        "UGC": "C",
+        "UGU": "C",
+        "CCA": "P",
+        "CCC": "P",
+        "CCG": "P",
+        "CCU": "P",
+        "UGG": "W",
+        "AGA": "R",
+        "AGG": "R",
+        "CGA": "R",
+        "CGC": "R",
+        "CGG": "R",
+        "CGU": "R",
+        "UUA": "L",
+        "UUG": "L",
+        "CUA": "L",
+        "CUC": "L",
+        "CUG": "L",
+        "CUU": "L",
+        "UUC": "F",
+        "UUU": "F",
+    }

-	#loop that runs through the sequences from each amino acid in the sequence
-	for i in xrange(0, len(stri)-2):
-		tempStr = ""
-		tempBool = False
-		j = i
-		#find an orf and break when it is finished
-		while j < len(stri)-2:
-			if translate[stri[j:j+3]] == "*":
-				tempBool = False
-			if translate[stri[j:j+3]] == "M":
-				tempBool = True
-			if tempBool:
-				tempStr += translate[stri[j:j+3]]
-			else:
-				break
-			j += 3
-		#add the orf to proteins only if it ends with a stop codon
-		if tempStr != "" and tempBool == False:
-			proteins.extend([tempStr])
-	#return the protein list			
-	return proteins
+    # list to contain protein sequences
+    proteins = []

-#function to return the DNA compliment
+    # loop that runs through the sequences from each amino acid in the sequence
+    for i in range(0, len(stri) - 2):
+        tempStr = ""
+        tempBool = False
+        j = i
+        # find an orf and break when it is finished
+        while j < len(stri) - 2:
+            if translate[stri[j: j + 3]] == "*":
+                tempBool = False
+            if translate[stri[j: j + 3]] == "M":
+                tempBool = True
+            if tempBool:
+                tempStr += translate[stri[j: j + 3]]
+            else:
+                break
+            j += 3
+        # add the orf to proteins only if it ends with a stop codon
+        if tempStr != "" and tempBool is False:
+            proteins.extend([tempStr])
+    # return the protein list
+    return proteins
+
+
+# function to return the DNA compliment
 def DNACompliment(stri):
-	rev = stri[::-1]
-	revList = list(rev)
-	for i in xrange(0, len(revList)):
-		if revList[i] == "A":
-			revList[i] = "T"
-		elif revList[i] == "T":
-			revList[i] = "A"
-		elif revList[i] == "G":
-			revList[i] = "C"
-		elif revList[i] == "C":
-			revList[i] = "G"
-	revCompStr = "".join(revList)
-	return revCompStr
+    rev = stri[::-1]
+    revList = list(rev)
+    for i in range(0, len(revList)):
+        if revList[i] == "A":
+            revList[i] = "T"
+        elif revList[i] == "T":
+            revList[i] = "A"
+        elif revList[i] == "G":
+            revList[i] = "C"
+        elif revList[i] == "C":
+            revList[i] = "G"
+    revCompStr = "".join(revList)
+    return revCompStr

-#get file content
+
+# get file content
 f = open("rosalind_orf.txt")
 content = f.readlines()
 f.close()

 DNAStr = ""
-#construct full DNA sequence
-for i in xrange(1, len(content)):
-	DNAStr += content[i]
+# construct full DNA sequence
+for i in range(1, len(content)):
+    DNAStr += content[i]

-#remove whitespace
+# remove whitespace
 noWhiteDNA = "".join(DNAStr.split())

-#convert DNA to RNA
+# convert DNA to RNA
 RNA = noWhiteDNA.replace("T", "U")

-#get DNA compliment and convert that to RNA as well
+# get DNA compliment and convert that to RNA as well
 revCompRNA = DNACompliment(noWhiteDNA).replace("T", "U")

-#get orfs for both sequences
+# get orfs for both sequences
 protList = orfGen(RNA)
 protList.extend(orfGen(revCompRNA))

-#get rid of duplicates
+# get rid of duplicates
 finalList = list(set(protList))

-#print the list
+# print the list
 for i in finalList:
-	print(i)
+    print(i)
+