added solution to ORF problem
This commit is contained in:
		
							parent
							
								
									96c59030ff
								
							
						
					
					
						commit
						25bfaf8e87
					
				
							
								
								
									
										102
									
								
								ORF.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										102
									
								
								ORF.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,102 @@ | |||||||
|  | #!/usr/bin/python | ||||||
|  | 
 | ||||||
|  | #Solution to the ORF rosalind problem - 'Open Reading Frames' | ||||||
|  | #Author: Peter Vlasveld | ||||||
|  | 
 | ||||||
|  | #function to generate the orfs | ||||||
|  | def orfGen(stri): | ||||||
|  | 	#dictionary for translation | ||||||
|  | 	translate = {  | ||||||
|  | 		"AAA":'K', "AAG":'K', | ||||||
|  | 		"GAA":'E', "GAG":'E', | ||||||
|  | 		"AAC":'N', "AAU":'N', | ||||||
|  | 		"GAC":'D', "GAU":'D', | ||||||
|  | 		"ACA":'T', "ACC":'T', "ACG":'T', "ACU":'T', | ||||||
|  | 		"GCA":"A", "GCC":"A", "GCG":"A", "GCU":"A", | ||||||
|  | 		"GGA":"G", "GGC":"G", "GGG":"G", "GGU":"G", | ||||||
|  | 		"GUA":"V", "GUC":"V", "GUG":"V", "GUU":"V", | ||||||
|  | 		"AUG":"M", | ||||||
|  | 		"UAA":"*", "UAG":"*", "UGA":"*", | ||||||
|  | 		"AUC":"I", "AUU":"I", "AUA":"I", | ||||||
|  | 		"UAC":"Y", "UAU":"Y", | ||||||
|  | 		"CAA":"Q", "CAG":"Q", | ||||||
|  | 		"AGC":"S", "AGU":"S", "UCA":"S", "UCC":"S", "UCG":"S", "UCU":"S", | ||||||
|  | 		"CAC":"H", "CAU":"H", | ||||||
|  | 		"UGC":"C", "UGU":"C", | ||||||
|  | 		"CCA":"P", "CCC":"P", "CCG":"P", "CCU":"P", | ||||||
|  | 		"UGG":"W", | ||||||
|  | 		"AGA":"R", "AGG":"R", "CGA":"R", "CGC":"R", "CGG":"R", "CGU":"R", | ||||||
|  | 		"UUA":"L", "UUG":"L", "CUA":"L", "CUC":"L", "CUG":"L", "CUU":"L", | ||||||
|  | 		"UUC":"F", "UUU":"F" | ||||||
|  | 	} | ||||||
|  | 	 | ||||||
|  | 	#list to contain protein sequences | ||||||
|  | 	proteins = [] | ||||||
|  | 
 | ||||||
|  | 	#loop that runs through the sequences from each amino acid in the sequence | ||||||
|  | 	for i in xrange(0, len(stri)-2): | ||||||
|  | 		tempStr = "" | ||||||
|  | 		tempBool = False | ||||||
|  | 		j = i | ||||||
|  | 		#find an orf and break when it is finished | ||||||
|  | 		while j < len(stri)-2: | ||||||
|  | 			if translate[stri[j:j+3]] == "*": | ||||||
|  | 				tempBool = False | ||||||
|  | 			if translate[stri[j:j+3]] == "M": | ||||||
|  | 				tempBool = True | ||||||
|  | 			if tempBool: | ||||||
|  | 				tempStr += translate[stri[j:j+3]] | ||||||
|  | 			else: | ||||||
|  | 				break | ||||||
|  | 			j += 3 | ||||||
|  | 		#add the orf to proteins only if it ends with a stop codon | ||||||
|  | 		if tempStr != "" and tempBool == False: | ||||||
|  | 			proteins.extend([tempStr]) | ||||||
|  | 	#return the protein list			 | ||||||
|  | 	return proteins | ||||||
|  | 
 | ||||||
|  | #function to return the DNA compliment | ||||||
|  | def DNACompliment(stri): | ||||||
|  | 	rev = stri[::-1] | ||||||
|  | 	revList = list(rev) | ||||||
|  | 	for i in xrange(0, len(revList)): | ||||||
|  | 		if revList[i] == "A": | ||||||
|  | 			revList[i] = "T" | ||||||
|  | 		elif revList[i] == "T": | ||||||
|  | 			revList[i] = "A" | ||||||
|  | 		elif revList[i] == "G": | ||||||
|  | 			revList[i] = "C" | ||||||
|  | 		elif revList[i] == "C": | ||||||
|  | 			revList[i] = "G" | ||||||
|  | 	revCompStr = "".join(revList) | ||||||
|  | 	return revCompStr | ||||||
|  | 
 | ||||||
|  | #get file content | ||||||
|  | f = open("rosalind_orf.txt") | ||||||
|  | content = f.readlines() | ||||||
|  | f.close() | ||||||
|  | 
 | ||||||
|  | DNAStr = "" | ||||||
|  | #construct full DNA sequence | ||||||
|  | for i in xrange(1, len(content)): | ||||||
|  | 	DNAStr += content[i] | ||||||
|  | 
 | ||||||
|  | #remove whitespace | ||||||
|  | noWhiteDNA = "".join(DNAStr.split()) | ||||||
|  | 
 | ||||||
|  | #convert DNA to RNA | ||||||
|  | RNA = noWhiteDNA.replace("T", "U") | ||||||
|  | 
 | ||||||
|  | #get DNA compliment and convert that to RNA as well | ||||||
|  | revCompRNA = DNACompliment(noWhiteDNA).replace("T", "U") | ||||||
|  | 
 | ||||||
|  | #get orfs for both sequences | ||||||
|  | protList = orfGen(RNA) | ||||||
|  | protList.extend(orfGen(revCompRNA)) | ||||||
|  | 
 | ||||||
|  | #get rid of duplicates | ||||||
|  | finalList = list(set(protList)) | ||||||
|  | 
 | ||||||
|  | #print the list | ||||||
|  | for i in finalList: | ||||||
|  | 	print(i) | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Peter Vlasveld
						Peter Vlasveld