update ORF to python 3
This commit is contained in:
parent
43de339fc5
commit
222e3ee447
140
python/ORF.py
140
python/ORF.py
@ -1,65 +1,109 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
#Solution to the ORF rosalind problem - 'Open Reading Frames'
|
||||
#Author: Peter Vlasveld
|
||||
# Solution to the ORF rosalind problem - 'Open Reading Frames'
|
||||
# Author: Peter Vlasveld
|
||||
|
||||
#function to generate the orfs
|
||||
# function to generate the orfs
|
||||
def orfGen(stri):
|
||||
#dictionary for translation
|
||||
# dictionary for translation
|
||||
translate = {
|
||||
"AAA":'K', "AAG":'K',
|
||||
"GAA":'E', "GAG":'E',
|
||||
"AAC":'N', "AAU":'N',
|
||||
"GAC":'D', "GAU":'D',
|
||||
"ACA":'T', "ACC":'T', "ACG":'T', "ACU":'T',
|
||||
"GCA":"A", "GCC":"A", "GCG":"A", "GCU":"A",
|
||||
"GGA":"G", "GGC":"G", "GGG":"G", "GGU":"G",
|
||||
"GUA":"V", "GUC":"V", "GUG":"V", "GUU":"V",
|
||||
"AUG":"M",
|
||||
"UAA":"*", "UAG":"*", "UGA":"*",
|
||||
"AUC":"I", "AUU":"I", "AUA":"I",
|
||||
"UAC":"Y", "UAU":"Y",
|
||||
"CAA":"Q", "CAG":"Q",
|
||||
"AGC":"S", "AGU":"S", "UCA":"S", "UCC":"S", "UCG":"S", "UCU":"S",
|
||||
"CAC":"H", "CAU":"H",
|
||||
"UGC":"C", "UGU":"C",
|
||||
"CCA":"P", "CCC":"P", "CCG":"P", "CCU":"P",
|
||||
"UGG":"W",
|
||||
"AGA":"R", "AGG":"R", "CGA":"R", "CGC":"R", "CGG":"R", "CGU":"R",
|
||||
"UUA":"L", "UUG":"L", "CUA":"L", "CUC":"L", "CUG":"L", "CUU":"L",
|
||||
"UUC":"F", "UUU":"F"
|
||||
"AAA": "K",
|
||||
"AAG": "K",
|
||||
"GAA": "E",
|
||||
"GAG": "E",
|
||||
"AAC": "N",
|
||||
"AAU": "N",
|
||||
"GAC": "D",
|
||||
"GAU": "D",
|
||||
"ACA": "T",
|
||||
"ACC": "T",
|
||||
"ACG": "T",
|
||||
"ACU": "T",
|
||||
"GCA": "A",
|
||||
"GCC": "A",
|
||||
"GCG": "A",
|
||||
"GCU": "A",
|
||||
"GGA": "G",
|
||||
"GGC": "G",
|
||||
"GGG": "G",
|
||||
"GGU": "G",
|
||||
"GUA": "V",
|
||||
"GUC": "V",
|
||||
"GUG": "V",
|
||||
"GUU": "V",
|
||||
"AUG": "M",
|
||||
"UAA": "*",
|
||||
"UAG": "*",
|
||||
"UGA": "*",
|
||||
"AUC": "I",
|
||||
"AUU": "I",
|
||||
"AUA": "I",
|
||||
"UAC": "Y",
|
||||
"UAU": "Y",
|
||||
"CAA": "Q",
|
||||
"CAG": "Q",
|
||||
"AGC": "S",
|
||||
"AGU": "S",
|
||||
"UCA": "S",
|
||||
"UCC": "S",
|
||||
"UCG": "S",
|
||||
"UCU": "S",
|
||||
"CAC": "H",
|
||||
"CAU": "H",
|
||||
"UGC": "C",
|
||||
"UGU": "C",
|
||||
"CCA": "P",
|
||||
"CCC": "P",
|
||||
"CCG": "P",
|
||||
"CCU": "P",
|
||||
"UGG": "W",
|
||||
"AGA": "R",
|
||||
"AGG": "R",
|
||||
"CGA": "R",
|
||||
"CGC": "R",
|
||||
"CGG": "R",
|
||||
"CGU": "R",
|
||||
"UUA": "L",
|
||||
"UUG": "L",
|
||||
"CUA": "L",
|
||||
"CUC": "L",
|
||||
"CUG": "L",
|
||||
"CUU": "L",
|
||||
"UUC": "F",
|
||||
"UUU": "F",
|
||||
}
|
||||
|
||||
#list to contain protein sequences
|
||||
# list to contain protein sequences
|
||||
proteins = []
|
||||
|
||||
#loop that runs through the sequences from each amino acid in the sequence
|
||||
for i in xrange(0, len(stri)-2):
|
||||
# loop that runs through the sequences from each amino acid in the sequence
|
||||
for i in range(0, len(stri) - 2):
|
||||
tempStr = ""
|
||||
tempBool = False
|
||||
j = i
|
||||
#find an orf and break when it is finished
|
||||
while j < len(stri)-2:
|
||||
if translate[stri[j:j+3]] == "*":
|
||||
# find an orf and break when it is finished
|
||||
while j < len(stri) - 2:
|
||||
if translate[stri[j: j + 3]] == "*":
|
||||
tempBool = False
|
||||
if translate[stri[j:j+3]] == "M":
|
||||
if translate[stri[j: j + 3]] == "M":
|
||||
tempBool = True
|
||||
if tempBool:
|
||||
tempStr += translate[stri[j:j+3]]
|
||||
tempStr += translate[stri[j: j + 3]]
|
||||
else:
|
||||
break
|
||||
j += 3
|
||||
#add the orf to proteins only if it ends with a stop codon
|
||||
if tempStr != "" and tempBool == False:
|
||||
# add the orf to proteins only if it ends with a stop codon
|
||||
if tempStr != "" and tempBool is False:
|
||||
proteins.extend([tempStr])
|
||||
#return the protein list
|
||||
# return the protein list
|
||||
return proteins
|
||||
|
||||
#function to return the DNA compliment
|
||||
|
||||
# function to return the DNA compliment
|
||||
def DNACompliment(stri):
|
||||
rev = stri[::-1]
|
||||
revList = list(rev)
|
||||
for i in xrange(0, len(revList)):
|
||||
for i in range(0, len(revList)):
|
||||
if revList[i] == "A":
|
||||
revList[i] = "T"
|
||||
elif revList[i] == "T":
|
||||
@ -71,32 +115,34 @@ def DNACompliment(stri):
|
||||
revCompStr = "".join(revList)
|
||||
return revCompStr
|
||||
|
||||
#get file content
|
||||
|
||||
# get file content
|
||||
f = open("rosalind_orf.txt")
|
||||
content = f.readlines()
|
||||
f.close()
|
||||
|
||||
DNAStr = ""
|
||||
#construct full DNA sequence
|
||||
for i in xrange(1, len(content)):
|
||||
# construct full DNA sequence
|
||||
for i in range(1, len(content)):
|
||||
DNAStr += content[i]
|
||||
|
||||
#remove whitespace
|
||||
# remove whitespace
|
||||
noWhiteDNA = "".join(DNAStr.split())
|
||||
|
||||
#convert DNA to RNA
|
||||
# convert DNA to RNA
|
||||
RNA = noWhiteDNA.replace("T", "U")
|
||||
|
||||
#get DNA compliment and convert that to RNA as well
|
||||
# get DNA compliment and convert that to RNA as well
|
||||
revCompRNA = DNACompliment(noWhiteDNA).replace("T", "U")
|
||||
|
||||
#get orfs for both sequences
|
||||
# get orfs for both sequences
|
||||
protList = orfGen(RNA)
|
||||
protList.extend(orfGen(revCompRNA))
|
||||
|
||||
#get rid of duplicates
|
||||
# get rid of duplicates
|
||||
finalList = list(set(protList))
|
||||
|
||||
#print the list
|
||||
# print the list
|
||||
for i in finalList:
|
||||
print(i)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user