rosalind-solutions/python/MPRT.py
2025-02-18 21:00:05 -05:00

54 lines
1.2 KiB
Python

#!/usr/bin/python
# MPRT - Finding a Protein Motif Solution
# Problem can be found at http://rosalind.info/problems/mprt/
# Author: Peter Vlasveld
import re
from time import sleep
import requests
# declare motif
motif = "N[^P][ST][^P]"
# read in file
f0 = open("rosalind_mprt.txt", "r")
content = f0.read().splitlines()
f0.close()
# open output file
f1 = open("output.txt", "w+")
# loop through each accession ID
for i in content:
# get fasta from url
url = "http://www.uniprot.org/uniprot/" + i.split("_")[0] + ".fasta"
response = requests.get(url)
if response.status_code != 200:
print(f"uniprot request failed for {i}")
print(f"detail: {response.text}")
fasta = response.text.splitlines()
# format protein string
protStr = ""
for j in fasta:
if not j.startswith(">"):
protStr += j
# construct output strings
outStr = ""
for j in range(0, len(protStr) - 4):
if re.match(motif, protStr[j: j + 4]):
outStr += str(j + 1) + " "
# output
if not outStr == "":
print(i)
f1.write(i + "\n")
print(outStr)
f1.write(outStr + "\n")
sleep(1)
# close output file
f1.close()