From 43de339fc56d1d9ca07fcc67ec4ef32c4ebabcd1 Mon Sep 17 00:00:00 2001 From: Fizzizist Date: Tue, 18 Feb 2025 21:00:05 -0500 Subject: [PATCH] fixed MPRT --- .gitignore | 2 ++ python/MPRT.py | 71 ++++++++++++++++++++++++++++---------------------- 2 files changed, 42 insertions(+), 31 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..da96889 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.venv +*.txt diff --git a/python/MPRT.py b/python/MPRT.py index 015e8d1..91c1878 100644 --- a/python/MPRT.py +++ b/python/MPRT.py @@ -1,44 +1,53 @@ #!/usr/bin/python -#MPRT - Finding a Protein Motif Solution -#Problem can be found at http://rosalind.info/problems/mprt/ -#Author: Peter Vlasveld +# MPRT - Finding a Protein Motif Solution +# Problem can be found at http://rosalind.info/problems/mprt/ +# Author: Peter Vlasveld -import urllib2 import re +from time import sleep -#declare motif +import requests + +# declare motif motif = "N[^P][ST][^P]" -#read in file +# read in file f0 = open("rosalind_mprt.txt", "r") content = f0.read().splitlines() f0.close() -#open output file +# open output file f1 = open("output.txt", "w+") -#loop through each accession ID +# loop through each accession ID for i in content: - #get fasta from url - url = "http://www.uniprot.org/uniprot/" + i + ".fasta" - response = urllib2.urlopen(url) - fasta = response.read().splitlines() - - #format protein string - protStr = "" - for j in fasta: - if not j.startswith('>'): - protStr += j - #construct output strings - outStr = "" - for j in range(0, len(protStr)-4): - if re.match(motif,protStr[j:j+4]): - outStr += str(j+1) + " " - - #output - if not outStr == "": - print i - f1.write(i + "\n") - print outStr - f1.write(outStr + "\n") -#close output file + # get fasta from url + url = "http://www.uniprot.org/uniprot/" + i.split("_")[0] + ".fasta" + response = requests.get(url) + + if response.status_code != 200: + print(f"uniprot request failed for {i}") + print(f"detail: {response.text}") + + fasta = response.text.splitlines() + + # format protein string + protStr = "" + for j in fasta: + if not j.startswith(">"): + protStr += j + # construct output strings + outStr = "" + for j in range(0, len(protStr) - 4): + if re.match(motif, protStr[j: j + 4]): + outStr += str(j + 1) + " " + + # output + if not outStr == "": + print(i) + f1.write(i + "\n") + print(outStr) + f1.write(outStr + "\n") + + sleep(1) +# close output file f1.close()