diff --git a/MPRT.py b/MPRT.py new file mode 100644 index 0000000..015e8d1 --- /dev/null +++ b/MPRT.py @@ -0,0 +1,44 @@ +#!/usr/bin/python +#MPRT - Finding a Protein Motif Solution +#Problem can be found at http://rosalind.info/problems/mprt/ +#Author: Peter Vlasveld + +import urllib2 +import re + +#declare motif +motif = "N[^P][ST][^P]" + +#read in file +f0 = open("rosalind_mprt.txt", "r") +content = f0.read().splitlines() +f0.close() + +#open output file +f1 = open("output.txt", "w+") +#loop through each accession ID +for i in content: + #get fasta from url + url = "http://www.uniprot.org/uniprot/" + i + ".fasta" + response = urllib2.urlopen(url) + fasta = response.read().splitlines() + + #format protein string + protStr = "" + for j in fasta: + if not j.startswith('>'): + protStr += j + #construct output strings + outStr = "" + for j in range(0, len(protStr)-4): + if re.match(motif,protStr[j:j+4]): + outStr += str(j+1) + " " + + #output + if not outStr == "": + print i + f1.write(i + "\n") + print outStr + f1.write(outStr + "\n") +#close output file +f1.close()