rosalind-solutions/MPRT.py
2018-12-16 11:15:51 -05:00

45 lines
929 B
Python

#!/usr/bin/python
#MPRT - Finding a Protein Motif Solution
#Problem can be found at http://rosalind.info/problems/mprt/
#Author: Peter Vlasveld
import urllib2
import re
#declare motif
motif = "N[^P][ST][^P]"
#read in file
f0 = open("rosalind_mprt.txt", "r")
content = f0.read().splitlines()
f0.close()
#open output file
f1 = open("output.txt", "w+")
#loop through each accession ID
for i in content:
#get fasta from url
url = "http://www.uniprot.org/uniprot/" + i + ".fasta"
response = urllib2.urlopen(url)
fasta = response.read().splitlines()
#format protein string
protStr = ""
for j in fasta:
if not j.startswith('>'):
protStr += j
#construct output strings
outStr = ""
for j in range(0, len(protStr)-4):
if re.match(motif,protStr[j:j+4]):
outStr += str(j+1) + " "
#output
if not outStr == "":
print i
f1.write(i + "\n")
print outStr
f1.write(outStr + "\n")
#close output file
f1.close()