From 43de339fc56d1d9ca07fcc67ec4ef32c4ebabcd1 Mon Sep 17 00:00:00 2001
From: Fizzizist <git@fizzizist.33mail.com>
Date: Tue, 18 Feb 2025 21:00:05 -0500
Subject: [PATCH] fixed MPRT

---
 .gitignore     |  2 ++
 python/MPRT.py | 71 ++++++++++++++++++++++++++++----------------------
 2 files changed, 42 insertions(+), 31 deletions(-)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..da96889
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+.venv
+*.txt
diff --git a/python/MPRT.py b/python/MPRT.py
index 015e8d1..91c1878 100644
--- a/python/MPRT.py
+++ b/python/MPRT.py
@@ -1,44 +1,53 @@
 #!/usr/bin/python
-#MPRT - Finding a Protein Motif Solution
-#Problem can be found at http://rosalind.info/problems/mprt/
-#Author: Peter Vlasveld
+# MPRT - Finding a Protein Motif Solution
+# Problem can be found at http://rosalind.info/problems/mprt/
+# Author: Peter Vlasveld
 
-import urllib2
 import re
+from time import sleep
 
-#declare motif
+import requests
+
+# declare motif
 motif = "N[^P][ST][^P]"
 
-#read in file
+# read in file
 f0 = open("rosalind_mprt.txt", "r")
 content = f0.read().splitlines()
 f0.close()
 
-#open output file
+# open output file
 f1 = open("output.txt", "w+")
-#loop through each accession ID
+# loop through each accession ID
 for i in content:
-	#get fasta from url
-	url = "http://www.uniprot.org/uniprot/" + i + ".fasta"
-	response = urllib2.urlopen(url)
-	fasta = response.read().splitlines()
-	
-	#format protein string
-	protStr = ""
-	for j in fasta:
-		if not j.startswith('>'):
-			protStr += j
-	#construct output strings
-	outStr = ""
-	for j in range(0, len(protStr)-4):
-		if re.match(motif,protStr[j:j+4]):
-			outStr += str(j+1) + " "
-	
-	#output
-	if not outStr == "":
-		print i
-		f1.write(i + "\n")
-		print outStr
-		f1.write(outStr + "\n")
-#close output file
+    # get fasta from url
+    url = "http://www.uniprot.org/uniprot/" + i.split("_")[0] + ".fasta"
+    response = requests.get(url)
+
+    if response.status_code != 200:
+        print(f"uniprot request failed for {i}")
+        print(f"detail: {response.text}")
+
+    fasta = response.text.splitlines()
+
+    # format protein string
+    protStr = ""
+    for j in fasta:
+        if not j.startswith(">"):
+            protStr += j
+    # construct output strings
+    outStr = ""
+    for j in range(0, len(protStr) - 4):
+        if re.match(motif, protStr[j: j + 4]):
+            outStr += str(j + 1) + " "
+
+    # output
+    if not outStr == "":
+        print(i)
+        f1.write(i + "\n")
+        print(outStr)
+        f1.write(outStr + "\n")
+
+    sleep(1)
+# close output file
 f1.close()