fixed MPRT
This commit is contained in:
parent
8d4cc2b83c
commit
43de339fc5
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
.venv
|
||||
*.txt
|
@ -1,44 +1,53 @@
|
||||
#!/usr/bin/python
|
||||
#MPRT - Finding a Protein Motif Solution
|
||||
#Problem can be found at http://rosalind.info/problems/mprt/
|
||||
#Author: Peter Vlasveld
|
||||
# MPRT - Finding a Protein Motif Solution
|
||||
# Problem can be found at http://rosalind.info/problems/mprt/
|
||||
# Author: Peter Vlasveld
|
||||
|
||||
import urllib2
|
||||
import re
|
||||
from time import sleep
|
||||
|
||||
#declare motif
|
||||
import requests
|
||||
|
||||
# declare motif
|
||||
motif = "N[^P][ST][^P]"
|
||||
|
||||
#read in file
|
||||
# read in file
|
||||
f0 = open("rosalind_mprt.txt", "r")
|
||||
content = f0.read().splitlines()
|
||||
f0.close()
|
||||
|
||||
#open output file
|
||||
# open output file
|
||||
f1 = open("output.txt", "w+")
|
||||
#loop through each accession ID
|
||||
# loop through each accession ID
|
||||
for i in content:
|
||||
#get fasta from url
|
||||
url = "http://www.uniprot.org/uniprot/" + i + ".fasta"
|
||||
response = urllib2.urlopen(url)
|
||||
fasta = response.read().splitlines()
|
||||
|
||||
#format protein string
|
||||
protStr = ""
|
||||
for j in fasta:
|
||||
if not j.startswith('>'):
|
||||
protStr += j
|
||||
#construct output strings
|
||||
outStr = ""
|
||||
for j in range(0, len(protStr)-4):
|
||||
if re.match(motif,protStr[j:j+4]):
|
||||
outStr += str(j+1) + " "
|
||||
|
||||
#output
|
||||
if not outStr == "":
|
||||
print i
|
||||
f1.write(i + "\n")
|
||||
print outStr
|
||||
f1.write(outStr + "\n")
|
||||
#close output file
|
||||
# get fasta from url
|
||||
url = "http://www.uniprot.org/uniprot/" + i.split("_")[0] + ".fasta"
|
||||
response = requests.get(url)
|
||||
|
||||
if response.status_code != 200:
|
||||
print(f"uniprot request failed for {i}")
|
||||
print(f"detail: {response.text}")
|
||||
|
||||
fasta = response.text.splitlines()
|
||||
|
||||
# format protein string
|
||||
protStr = ""
|
||||
for j in fasta:
|
||||
if not j.startswith(">"):
|
||||
protStr += j
|
||||
# construct output strings
|
||||
outStr = ""
|
||||
for j in range(0, len(protStr) - 4):
|
||||
if re.match(motif, protStr[j: j + 4]):
|
||||
outStr += str(j + 1) + " "
|
||||
|
||||
# output
|
||||
if not outStr == "":
|
||||
print(i)
|
||||
f1.write(i + "\n")
|
||||
print(outStr)
|
||||
f1.write(outStr + "\n")
|
||||
|
||||
sleep(1)
|
||||
# close output file
|
||||
f1.close()
|
||||
|
Loading…
Reference in New Issue
Block a user