45 lines
		
	
	
		
			929 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			45 lines
		
	
	
		
			929 B
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/python
 | |
| #MPRT - Finding a Protein Motif Solution
 | |
| #Problem can be found at http://rosalind.info/problems/mprt/
 | |
| #Author: Peter Vlasveld
 | |
| 
 | |
| import urllib2
 | |
| import re
 | |
| 
 | |
| #declare motif
 | |
| motif = "N[^P][ST][^P]"
 | |
| 
 | |
| #read in file
 | |
| f0 = open("rosalind_mprt.txt", "r")
 | |
| content = f0.read().splitlines()
 | |
| f0.close()
 | |
| 
 | |
| #open output file
 | |
| f1 = open("output.txt", "w+")
 | |
| #loop through each accession ID
 | |
| for i in content:
 | |
| 	#get fasta from url
 | |
| 	url = "http://www.uniprot.org/uniprot/" + i + ".fasta"
 | |
| 	response = urllib2.urlopen(url)
 | |
| 	fasta = response.read().splitlines()
 | |
| 	
 | |
| 	#format protein string
 | |
| 	protStr = ""
 | |
| 	for j in fasta:
 | |
| 		if not j.startswith('>'):
 | |
| 			protStr += j
 | |
| 	#construct output strings
 | |
| 	outStr = ""
 | |
| 	for j in range(0, len(protStr)-4):
 | |
| 		if re.match(motif,protStr[j:j+4]):
 | |
| 			outStr += str(j+1) + " "
 | |
| 	
 | |
| 	#output
 | |
| 	if not outStr == "":
 | |
| 		print i
 | |
| 		f1.write(i + "\n")
 | |
| 		print outStr
 | |
| 		f1.write(outStr + "\n")
 | |
| #close output file
 | |
| f1.close()
 | 
