2018-12-16 16:15:51 +00:00
|
|
|
#!/usr/bin/python
|
2025-02-19 02:00:05 +00:00
|
|
|
# MPRT - Finding a Protein Motif Solution
|
|
|
|
# Problem can be found at http://rosalind.info/problems/mprt/
|
|
|
|
# Author: Peter Vlasveld
|
2018-12-16 16:15:51 +00:00
|
|
|
|
|
|
|
import re
|
2025-02-19 02:00:05 +00:00
|
|
|
from time import sleep
|
2018-12-16 16:15:51 +00:00
|
|
|
|
2025-02-19 02:00:05 +00:00
|
|
|
import requests
|
|
|
|
|
|
|
|
# declare motif
|
2018-12-16 16:15:51 +00:00
|
|
|
motif = "N[^P][ST][^P]"
|
|
|
|
|
2025-02-19 02:00:05 +00:00
|
|
|
# read in file
|
2018-12-16 16:15:51 +00:00
|
|
|
f0 = open("rosalind_mprt.txt", "r")
|
|
|
|
content = f0.read().splitlines()
|
|
|
|
f0.close()
|
|
|
|
|
2025-02-19 02:00:05 +00:00
|
|
|
# open output file
|
2018-12-16 16:15:51 +00:00
|
|
|
f1 = open("output.txt", "w+")
|
2025-02-19 02:00:05 +00:00
|
|
|
# loop through each accession ID
|
2018-12-16 16:15:51 +00:00
|
|
|
for i in content:
|
2025-02-19 02:00:05 +00:00
|
|
|
# get fasta from url
|
|
|
|
url = "http://www.uniprot.org/uniprot/" + i.split("_")[0] + ".fasta"
|
|
|
|
response = requests.get(url)
|
|
|
|
|
|
|
|
if response.status_code != 200:
|
|
|
|
print(f"uniprot request failed for {i}")
|
|
|
|
print(f"detail: {response.text}")
|
|
|
|
|
|
|
|
fasta = response.text.splitlines()
|
|
|
|
|
|
|
|
# format protein string
|
|
|
|
protStr = ""
|
|
|
|
for j in fasta:
|
|
|
|
if not j.startswith(">"):
|
|
|
|
protStr += j
|
|
|
|
# construct output strings
|
|
|
|
outStr = ""
|
|
|
|
for j in range(0, len(protStr) - 4):
|
|
|
|
if re.match(motif, protStr[j: j + 4]):
|
|
|
|
outStr += str(j + 1) + " "
|
|
|
|
|
|
|
|
# output
|
|
|
|
if not outStr == "":
|
|
|
|
print(i)
|
|
|
|
f1.write(i + "\n")
|
|
|
|
print(outStr)
|
|
|
|
f1.write(outStr + "\n")
|
|
|
|
|
|
|
|
sleep(1)
|
|
|
|
# close output file
|
2018-12-16 16:15:51 +00:00
|
|
|
f1.close()
|