update ORF to python 3

This commit is contained in:
Fizzizist 2025-02-18 21:08:41 -05:00
parent 43de339fc5
commit 222e3ee447

View File

@ -7,34 +7,77 @@
def orfGen(stri): def orfGen(stri):
# dictionary for translation # dictionary for translation
translate = { translate = {
"AAA":'K', "AAG":'K', "AAA": "K",
"GAA":'E', "GAG":'E', "AAG": "K",
"AAC":'N', "AAU":'N', "GAA": "E",
"GAC":'D', "GAU":'D', "GAG": "E",
"ACA":'T', "ACC":'T', "ACG":'T', "ACU":'T', "AAC": "N",
"GCA":"A", "GCC":"A", "GCG":"A", "GCU":"A", "AAU": "N",
"GGA":"G", "GGC":"G", "GGG":"G", "GGU":"G", "GAC": "D",
"GUA":"V", "GUC":"V", "GUG":"V", "GUU":"V", "GAU": "D",
"ACA": "T",
"ACC": "T",
"ACG": "T",
"ACU": "T",
"GCA": "A",
"GCC": "A",
"GCG": "A",
"GCU": "A",
"GGA": "G",
"GGC": "G",
"GGG": "G",
"GGU": "G",
"GUA": "V",
"GUC": "V",
"GUG": "V",
"GUU": "V",
"AUG": "M", "AUG": "M",
"UAA":"*", "UAG":"*", "UGA":"*", "UAA": "*",
"AUC":"I", "AUU":"I", "AUA":"I", "UAG": "*",
"UAC":"Y", "UAU":"Y", "UGA": "*",
"CAA":"Q", "CAG":"Q", "AUC": "I",
"AGC":"S", "AGU":"S", "UCA":"S", "UCC":"S", "UCG":"S", "UCU":"S", "AUU": "I",
"CAC":"H", "CAU":"H", "AUA": "I",
"UGC":"C", "UGU":"C", "UAC": "Y",
"CCA":"P", "CCC":"P", "CCG":"P", "CCU":"P", "UAU": "Y",
"CAA": "Q",
"CAG": "Q",
"AGC": "S",
"AGU": "S",
"UCA": "S",
"UCC": "S",
"UCG": "S",
"UCU": "S",
"CAC": "H",
"CAU": "H",
"UGC": "C",
"UGU": "C",
"CCA": "P",
"CCC": "P",
"CCG": "P",
"CCU": "P",
"UGG": "W", "UGG": "W",
"AGA":"R", "AGG":"R", "CGA":"R", "CGC":"R", "CGG":"R", "CGU":"R", "AGA": "R",
"UUA":"L", "UUG":"L", "CUA":"L", "CUC":"L", "CUG":"L", "CUU":"L", "AGG": "R",
"UUC":"F", "UUU":"F" "CGA": "R",
"CGC": "R",
"CGG": "R",
"CGU": "R",
"UUA": "L",
"UUG": "L",
"CUA": "L",
"CUC": "L",
"CUG": "L",
"CUU": "L",
"UUC": "F",
"UUU": "F",
} }
# list to contain protein sequences # list to contain protein sequences
proteins = [] proteins = []
# loop that runs through the sequences from each amino acid in the sequence # loop that runs through the sequences from each amino acid in the sequence
for i in xrange(0, len(stri)-2): for i in range(0, len(stri) - 2):
tempStr = "" tempStr = ""
tempBool = False tempBool = False
j = i j = i
@ -50,16 +93,17 @@ def orfGen(stri):
break break
j += 3 j += 3
# add the orf to proteins only if it ends with a stop codon # add the orf to proteins only if it ends with a stop codon
if tempStr != "" and tempBool == False: if tempStr != "" and tempBool is False:
proteins.extend([tempStr]) proteins.extend([tempStr])
# return the protein list # return the protein list
return proteins return proteins
# function to return the DNA compliment # function to return the DNA compliment
def DNACompliment(stri): def DNACompliment(stri):
rev = stri[::-1] rev = stri[::-1]
revList = list(rev) revList = list(rev)
for i in xrange(0, len(revList)): for i in range(0, len(revList)):
if revList[i] == "A": if revList[i] == "A":
revList[i] = "T" revList[i] = "T"
elif revList[i] == "T": elif revList[i] == "T":
@ -71,6 +115,7 @@ def DNACompliment(stri):
revCompStr = "".join(revList) revCompStr = "".join(revList)
return revCompStr return revCompStr
# get file content # get file content
f = open("rosalind_orf.txt") f = open("rosalind_orf.txt")
content = f.readlines() content = f.readlines()
@ -78,7 +123,7 @@ f.close()
DNAStr = "" DNAStr = ""
# construct full DNA sequence # construct full DNA sequence
for i in xrange(1, len(content)): for i in range(1, len(content)):
DNAStr += content[i] DNAStr += content[i]
# remove whitespace # remove whitespace
@ -100,3 +145,4 @@ finalList = list(set(protList))
# print the list # print the list
for i in finalList: for i in finalList:
print(i) print(i)