#!/usr/bin/python # MPRT - Finding a Protein Motif Solution # Problem can be found at http://rosalind.info/problems/mprt/ # Author: Peter Vlasveld import re from time import sleep import requests # declare motif motif = "N[^P][ST][^P]" # read in file f0 = open("rosalind_mprt.txt", "r") content = f0.read().splitlines() f0.close() # open output file f1 = open("output.txt", "w+") # loop through each accession ID for i in content: # get fasta from url url = "http://www.uniprot.org/uniprot/" + i.split("_")[0] + ".fasta" response = requests.get(url) if response.status_code != 200: print(f"uniprot request failed for {i}") print(f"detail: {response.text}") fasta = response.text.splitlines() # format protein string protStr = "" for j in fasta: if not j.startswith(">"): protStr += j # construct output strings outStr = "" for j in range(0, len(protStr) - 4): if re.match(motif, protStr[j: j + 4]): outStr += str(j + 1) + " " # output if not outStr == "": print(i) f1.write(i + "\n") print(outStr) f1.write(outStr + "\n") sleep(1) # close output file f1.close()