moved perl and python scripts into their own directories
This commit is contained in:
51
python/LEXF.py
Normal file
51
python/LEXF.py
Normal file
@@ -0,0 +1,51 @@
|
||||
#!/usr/bin/python
|
||||
"""
|
||||
LEXF Rosalind.info Solution - Enumerating k-mers Lexicographically
|
||||
Author: Peter Vlasveld
|
||||
Date: 12/01/2019
|
||||
"""
|
||||
|
||||
#Get dataset from file
|
||||
f1 = open("rosalind_lexf.txt")
|
||||
content = f1.readlines()
|
||||
f1.close()
|
||||
|
||||
#duplicate the lex set r times
|
||||
firstLex = content[0].split( )
|
||||
lex = []
|
||||
for i in xrange(0, int(content[1])):
|
||||
lex.extend(firstLex)
|
||||
|
||||
"""
|
||||
Recursive function to get all possible combinations within the lex set
|
||||
"""
|
||||
def wordCombo (arr, data, start, end, index, r):
|
||||
if index == r:
|
||||
tempStr = ""
|
||||
for i in data:
|
||||
tempStr += i
|
||||
global comboList
|
||||
comboList.append(tempStr)
|
||||
return
|
||||
i = start
|
||||
while i <= end and end - i + 1 >= r - index:
|
||||
data[index] = arr[i]
|
||||
wordCombo(arr, data, i + 1, end, index + 1, r)
|
||||
i += 1
|
||||
|
||||
#empty lsit to be passed into function
|
||||
data = [0]*int(content[1])
|
||||
|
||||
#global list to be filled with combinations
|
||||
comboList = []
|
||||
|
||||
#run the function, remove duplicates, and sort it alhphabetically
|
||||
wordCombo(lex,data,0,len(lex)-1,0,int(content[1]))
|
||||
finalList = list(set(comboList))
|
||||
sortedList = sorted(finalList)
|
||||
|
||||
#print the list to a file
|
||||
f2 = open("output.txt", "w")
|
||||
for i in sortedList:
|
||||
f2.writelines(i + "\n")
|
||||
f2.close()
|
||||
70
python/LGIS.py
Normal file
70
python/LGIS.py
Normal file
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Solution to the LGIS Rosalind Problem - Longest Increasing Subsequence
|
||||
# Author: Peter Vlasveld
|
||||
# Date: 18/01/2019
|
||||
import copy
|
||||
|
||||
#get input
|
||||
f1 = open("rosalind_lgis.txt")
|
||||
content = f1.readlines()
|
||||
f1.close()
|
||||
|
||||
#get array out of content
|
||||
arr = content[1].split( )
|
||||
|
||||
#function to find longest increasing subsequence of arr
|
||||
def inc (arr):
|
||||
#set s array to a list of a list of the first value in arr
|
||||
s = [[arr[0]]]
|
||||
#loop through arr skipping the first value
|
||||
for i in xrange(1,len(arr)):
|
||||
#if this arr value is greater than the last element in the last list of s,
|
||||
# then duplicate the last list and append the new value
|
||||
if int(arr[i]) > int(s[len(s)-1][len(s[len(s)-1])-1]):
|
||||
tempLis = copy.deepcopy(s[len(s)-1])
|
||||
tempLis.append(arr[i])
|
||||
s.append(tempLis)
|
||||
#else if the first list of s is greater than the arr value, make the arr value the new first list
|
||||
elif int(s[0][0]) > int(arr[i]):
|
||||
s[0] = copy.deepcopy([arr[i]])
|
||||
#otherwise, find the list in s where the last value is less greater than the arr value,
|
||||
# and replace it with the list before it, with the new value appended to the end
|
||||
else:
|
||||
for j in xrange(0, len(s)):
|
||||
if int(arr[i]) < int(s[j][len(s[j])-1]):
|
||||
s[j] = copy.deepcopy(s[j-1])
|
||||
s[j].append(arr[i])
|
||||
break
|
||||
#return the last element of s which at this point is the longest increasing subsequence
|
||||
return s[len(s)-1]
|
||||
|
||||
#find the longest decreasing subsequence
|
||||
#the logic is the exact same as the above increasing function,
|
||||
# just with all of the greater-than and less-than operators turned around
|
||||
def dec (arr):
|
||||
s = [[arr[0]]]
|
||||
for i in xrange(1,len(arr)):
|
||||
if int(arr[i]) < int(s[len(s)-1][len(s[len(s)-1])-1]):
|
||||
tempLis = copy.deepcopy(s[len(s)-1])
|
||||
tempLis.append(arr[i])
|
||||
s.append(tempLis)
|
||||
elif int(s[0][0]) < int(arr[i]):
|
||||
s[0] = copy.deepcopy([arr[i]])
|
||||
else:
|
||||
for j in xrange(0, len(s)):
|
||||
if int(arr[i]) > int(s[j][len(s[j])-1]):
|
||||
s[j] = copy.deepcopy(s[j-1])
|
||||
s[j].append(arr[i])
|
||||
break
|
||||
return s[len(s)-1]
|
||||
|
||||
#run the functions
|
||||
inc = inc(arr)
|
||||
dec = dec(arr)
|
||||
|
||||
#print the output to a file
|
||||
f2 = open("output.txt", "w")
|
||||
f2.write(" ".join(inc) + "\n")
|
||||
f2.write(" ".join(dec))
|
||||
f2.close()
|
||||
44
python/MPRT.py
Normal file
44
python/MPRT.py
Normal file
@@ -0,0 +1,44 @@
|
||||
#!/usr/bin/python
|
||||
#MPRT - Finding a Protein Motif Solution
|
||||
#Problem can be found at http://rosalind.info/problems/mprt/
|
||||
#Author: Peter Vlasveld
|
||||
|
||||
import urllib2
|
||||
import re
|
||||
|
||||
#declare motif
|
||||
motif = "N[^P][ST][^P]"
|
||||
|
||||
#read in file
|
||||
f0 = open("rosalind_mprt.txt", "r")
|
||||
content = f0.read().splitlines()
|
||||
f0.close()
|
||||
|
||||
#open output file
|
||||
f1 = open("output.txt", "w+")
|
||||
#loop through each accession ID
|
||||
for i in content:
|
||||
#get fasta from url
|
||||
url = "http://www.uniprot.org/uniprot/" + i + ".fasta"
|
||||
response = urllib2.urlopen(url)
|
||||
fasta = response.read().splitlines()
|
||||
|
||||
#format protein string
|
||||
protStr = ""
|
||||
for j in fasta:
|
||||
if not j.startswith('>'):
|
||||
protStr += j
|
||||
#construct output strings
|
||||
outStr = ""
|
||||
for j in range(0, len(protStr)-4):
|
||||
if re.match(motif,protStr[j:j+4]):
|
||||
outStr += str(j+1) + " "
|
||||
|
||||
#output
|
||||
if not outStr == "":
|
||||
print i
|
||||
f1.write(i + "\n")
|
||||
print outStr
|
||||
f1.write(outStr + "\n")
|
||||
#close output file
|
||||
f1.close()
|
||||
102
python/ORF.py
Normal file
102
python/ORF.py
Normal file
@@ -0,0 +1,102 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
#Solution to the ORF rosalind problem - 'Open Reading Frames'
|
||||
#Author: Peter Vlasveld
|
||||
|
||||
#function to generate the orfs
|
||||
def orfGen(stri):
|
||||
#dictionary for translation
|
||||
translate = {
|
||||
"AAA":'K', "AAG":'K',
|
||||
"GAA":'E', "GAG":'E',
|
||||
"AAC":'N', "AAU":'N',
|
||||
"GAC":'D', "GAU":'D',
|
||||
"ACA":'T', "ACC":'T', "ACG":'T', "ACU":'T',
|
||||
"GCA":"A", "GCC":"A", "GCG":"A", "GCU":"A",
|
||||
"GGA":"G", "GGC":"G", "GGG":"G", "GGU":"G",
|
||||
"GUA":"V", "GUC":"V", "GUG":"V", "GUU":"V",
|
||||
"AUG":"M",
|
||||
"UAA":"*", "UAG":"*", "UGA":"*",
|
||||
"AUC":"I", "AUU":"I", "AUA":"I",
|
||||
"UAC":"Y", "UAU":"Y",
|
||||
"CAA":"Q", "CAG":"Q",
|
||||
"AGC":"S", "AGU":"S", "UCA":"S", "UCC":"S", "UCG":"S", "UCU":"S",
|
||||
"CAC":"H", "CAU":"H",
|
||||
"UGC":"C", "UGU":"C",
|
||||
"CCA":"P", "CCC":"P", "CCG":"P", "CCU":"P",
|
||||
"UGG":"W",
|
||||
"AGA":"R", "AGG":"R", "CGA":"R", "CGC":"R", "CGG":"R", "CGU":"R",
|
||||
"UUA":"L", "UUG":"L", "CUA":"L", "CUC":"L", "CUG":"L", "CUU":"L",
|
||||
"UUC":"F", "UUU":"F"
|
||||
}
|
||||
|
||||
#list to contain protein sequences
|
||||
proteins = []
|
||||
|
||||
#loop that runs through the sequences from each amino acid in the sequence
|
||||
for i in xrange(0, len(stri)-2):
|
||||
tempStr = ""
|
||||
tempBool = False
|
||||
j = i
|
||||
#find an orf and break when it is finished
|
||||
while j < len(stri)-2:
|
||||
if translate[stri[j:j+3]] == "*":
|
||||
tempBool = False
|
||||
if translate[stri[j:j+3]] == "M":
|
||||
tempBool = True
|
||||
if tempBool:
|
||||
tempStr += translate[stri[j:j+3]]
|
||||
else:
|
||||
break
|
||||
j += 3
|
||||
#add the orf to proteins only if it ends with a stop codon
|
||||
if tempStr != "" and tempBool == False:
|
||||
proteins.extend([tempStr])
|
||||
#return the protein list
|
||||
return proteins
|
||||
|
||||
#function to return the DNA compliment
|
||||
def DNACompliment(stri):
|
||||
rev = stri[::-1]
|
||||
revList = list(rev)
|
||||
for i in xrange(0, len(revList)):
|
||||
if revList[i] == "A":
|
||||
revList[i] = "T"
|
||||
elif revList[i] == "T":
|
||||
revList[i] = "A"
|
||||
elif revList[i] == "G":
|
||||
revList[i] = "C"
|
||||
elif revList[i] == "C":
|
||||
revList[i] = "G"
|
||||
revCompStr = "".join(revList)
|
||||
return revCompStr
|
||||
|
||||
#get file content
|
||||
f = open("rosalind_orf.txt")
|
||||
content = f.readlines()
|
||||
f.close()
|
||||
|
||||
DNAStr = ""
|
||||
#construct full DNA sequence
|
||||
for i in xrange(1, len(content)):
|
||||
DNAStr += content[i]
|
||||
|
||||
#remove whitespace
|
||||
noWhiteDNA = "".join(DNAStr.split())
|
||||
|
||||
#convert DNA to RNA
|
||||
RNA = noWhiteDNA.replace("T", "U")
|
||||
|
||||
#get DNA compliment and convert that to RNA as well
|
||||
revCompRNA = DNACompliment(noWhiteDNA).replace("T", "U")
|
||||
|
||||
#get orfs for both sequences
|
||||
protList = orfGen(RNA)
|
||||
protList.extend(orfGen(revCompRNA))
|
||||
|
||||
#get rid of duplicates
|
||||
finalList = list(set(protList))
|
||||
|
||||
#print the list
|
||||
for i in finalList:
|
||||
print(i)
|
||||
46
python/PROB.py
Normal file
46
python/PROB.py
Normal file
@@ -0,0 +1,46 @@
|
||||
#!/usr/bin/python3
|
||||
"""
|
||||
Solutions to the PROB Rosalind Problem - Introduction to Random Strings
|
||||
Author: Peter Vlasveld
|
||||
Date: 20190222
|
||||
"""
|
||||
import math
|
||||
|
||||
#Read in file content into list
|
||||
f1 = open("rosalind_prob.txt")
|
||||
content = f1.readlines()
|
||||
f1.close()
|
||||
|
||||
"""
|
||||
Function to calculate the probability of the exact sequence given the GC-content
|
||||
@param gc - the GC-content given
|
||||
@param seq - the sequence being studied
|
||||
|
||||
@return - the raw probability value
|
||||
"""
|
||||
def calcProb(gc, seq):
|
||||
# get probability for just one nucleotide
|
||||
gchalf = gc/2.0
|
||||
athalf = (1.0-gc)/2.0
|
||||
|
||||
# multiply all of the probability values for each letter in the sequence
|
||||
prob = 1.0
|
||||
for i in range(0, len(seq)):
|
||||
if seq[i] == 'A' or seq[i] == 'T':
|
||||
prob *= athalf
|
||||
elif seq[i] == 'G' or seq[i] == 'C':
|
||||
prob *= gchalf
|
||||
|
||||
# return the probability value
|
||||
return prob
|
||||
|
||||
# take the log of the probabilities returned by calcProb
|
||||
# format them to 3 decimal places
|
||||
gcCons = content[1].split( )
|
||||
finalProbs = []
|
||||
for i in gcCons:
|
||||
rawProb = calcProb(float(i), content[0])
|
||||
finalProbs.append("%.3f" % math.log10(rawProb))
|
||||
|
||||
# print the probabilities with spaces in between
|
||||
print(" ".join(finalProbs))
|
||||
50
python/REVP.py
Normal file
50
python/REVP.py
Normal file
@@ -0,0 +1,50 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
#Solution to REVP Rosalind problem - Locating Restriction Sites
|
||||
#Author: Peter Vlasveld
|
||||
|
||||
#read in file and get contents
|
||||
f1 = open("rosalind_revp.txt")
|
||||
content = f1.readlines()
|
||||
f1.close()
|
||||
|
||||
#get DNA string from content
|
||||
DNAStr = ""
|
||||
for i in xrange(1,len(content)):
|
||||
DNAStr += content[i].strip()
|
||||
|
||||
#function to return the DNA compliment
|
||||
def DNACompliment(stri):
|
||||
rev = stri[::-1]
|
||||
revList = list(rev)
|
||||
for i in xrange(0, len(revList)):
|
||||
if revList[i] == "A":
|
||||
revList[i] = "T"
|
||||
elif revList[i] == "T":
|
||||
revList[i] = "A"
|
||||
elif revList[i] == "G":
|
||||
revList[i] = "C"
|
||||
elif revList[i] == "C":
|
||||
revList[i] = "G"
|
||||
revCompStr = "".join(revList)
|
||||
return revCompStr
|
||||
|
||||
#loop through DNA string
|
||||
indexes = []
|
||||
lengths = []
|
||||
for j in xrange(0, len(DNAStr)-3):
|
||||
#evaluating 4 to 12 character sets, taking the reverse compliment and comparing,
|
||||
#if a match is found record the index and length and then break
|
||||
for k in xrange(4, 13):
|
||||
comp = DNACompliment(DNAStr[j:j+k])
|
||||
if comp == DNAStr[j:j+k]:
|
||||
indexes.extend([j+1])
|
||||
lengths.extend([k])
|
||||
print(DNAStr[j:j+k])
|
||||
break
|
||||
|
||||
#output results to a file
|
||||
f2 = open("output.txt","w")
|
||||
for i in xrange(0,len(indexes)):
|
||||
f2.write(str(indexes[i]) + " " + str(lengths[i]) + "\n")
|
||||
f2.close()
|
||||
Reference in New Issue
Block a user