moved perl and python scripts into their own directories

2025-01-04 15:58:59 -05:00
parent 821caa4618
commit af2ce045ec
13 changed files with 0 additions and 0 deletions
--- a/python/LEXF.py
+++ b/python/LEXF.py
@@ -0,0 +1,51 @@
+#!/usr/bin/python
+"""
+LEXF Rosalind.info Solution - Enumerating k-mers Lexicographically
+Author: Peter Vlasveld
+Date: 12/01/2019
+"""
+
+#Get dataset from file
+f1 = open("rosalind_lexf.txt")
+content = f1.readlines()
+f1.close()
+
+#duplicate the lex set r times
+firstLex = content[0].split( )
+lex = []
+for i in xrange(0, int(content[1])):
+    lex.extend(firstLex)
+
+"""
+Recursive function to get all possible combinations within the lex set
+"""
+def wordCombo (arr, data, start, end, index, r):
+    if index == r:
+        tempStr = ""
+        for i in data:
+            tempStr += i
+        global comboList
+        comboList.append(tempStr)
+        return
+    i = start
+    while i <= end and end - i + 1 >= r - index:
+        data[index] = arr[i]
+        wordCombo(arr, data, i + 1, end, index + 1, r)
+        i += 1
+
+#empty lsit to be passed into function
+data = [0]*int(content[1])
+
+#global list to be filled with combinations
+comboList = []
+
+#run the function, remove duplicates, and sort it alhphabetically
+wordCombo(lex,data,0,len(lex)-1,0,int(content[1]))
+finalList = list(set(comboList))
+sortedList = sorted(finalList)
+
+#print the list to a file
+f2 = open("output.txt", "w")
+for i in sortedList:
+    f2.writelines(i + "\n")
+f2.close()
--- a/python/LGIS.py
+++ b/python/LGIS.py
@@ -0,0 +1,70 @@
+#!/usr/bin/python
+
+# Solution to the LGIS Rosalind Problem - Longest Increasing Subsequence
+# Author: Peter Vlasveld
+# Date: 18/01/2019
+import copy
+
+#get input
+f1 = open("rosalind_lgis.txt")
+content = f1.readlines()
+f1.close()
+
+#get array out of content
+arr = content[1].split( )
+
+#function to find longest increasing subsequence of arr
+def inc (arr):
+    #set s array to a list of a list of the first value in arr
+    s = [[arr[0]]]
+    #loop through arr skipping the first value
+    for i in xrange(1,len(arr)):
+        #if this arr value is greater than the last element in the last list of s, 
+        # then duplicate the last list and append the new value
+        if int(arr[i]) > int(s[len(s)-1][len(s[len(s)-1])-1]):
+            tempLis = copy.deepcopy(s[len(s)-1])
+            tempLis.append(arr[i])
+            s.append(tempLis)
+        #else if the first list of s is greater than the arr value, make the arr value the new first list
+        elif int(s[0][0]) > int(arr[i]):
+            s[0] = copy.deepcopy([arr[i]])
+        #otherwise, find the list in s where the last value is less greater than the arr value,
+        # and replace it with the list before it, with the new value appended to the end
+        else:
+            for j in xrange(0, len(s)):
+                if int(arr[i]) < int(s[j][len(s[j])-1]):
+                    s[j] = copy.deepcopy(s[j-1])
+                    s[j].append(arr[i])
+                    break
+    #return the last element of s which at this point is the longest increasing subsequence
+    return s[len(s)-1]
+
+#find the longest decreasing subsequence
+#the logic is the exact same as the above increasing function, 
+# just with all of the greater-than and less-than operators turned around
+def dec (arr):
+    s = [[arr[0]]]
+    for i in xrange(1,len(arr)):
+        if int(arr[i]) < int(s[len(s)-1][len(s[len(s)-1])-1]):
+            tempLis = copy.deepcopy(s[len(s)-1])
+            tempLis.append(arr[i])
+            s.append(tempLis)
+        elif int(s[0][0]) < int(arr[i]):
+            s[0] = copy.deepcopy([arr[i]])
+        else:
+            for j in xrange(0, len(s)):
+                if int(arr[i]) > int(s[j][len(s[j])-1]):
+                    s[j] = copy.deepcopy(s[j-1])
+                    s[j].append(arr[i])
+                    break
+    return s[len(s)-1]
+
+#run the functions
+inc = inc(arr)
+dec = dec(arr)
+
+#print the output to a file
+f2 = open("output.txt", "w")
+f2.write(" ".join(inc) + "\n")
+f2.write(" ".join(dec))
+f2.close() 
--- a/python/MPRT.py
+++ b/python/MPRT.py
@@ -0,0 +1,44 @@
+#!/usr/bin/python
+#MPRT - Finding a Protein Motif Solution
+#Problem can be found at http://rosalind.info/problems/mprt/
+#Author: Peter Vlasveld
+
+import urllib2
+import re
+
+#declare motif
+motif = "N[^P][ST][^P]"
+
+#read in file
+f0 = open("rosalind_mprt.txt", "r")
+content = f0.read().splitlines()
+f0.close()
+
+#open output file
+f1 = open("output.txt", "w+")
+#loop through each accession ID
+for i in content:
+	#get fasta from url
+	url = "http://www.uniprot.org/uniprot/" + i + ".fasta"
+	response = urllib2.urlopen(url)
+	fasta = response.read().splitlines()
+	
+	#format protein string
+	protStr = ""
+	for j in fasta:
+		if not j.startswith('>'):
+			protStr += j
+	#construct output strings
+	outStr = ""
+	for j in range(0, len(protStr)-4):
+		if re.match(motif,protStr[j:j+4]):
+			outStr += str(j+1) + " "
+	
+	#output
+	if not outStr == "":
+		print i
+		f1.write(i + "\n")
+		print outStr
+		f1.write(outStr + "\n")
+#close output file
+f1.close()
--- a/python/ORF.py
+++ b/python/ORF.py
@@ -0,0 +1,102 @@
+#!/usr/bin/python
+
+#Solution to the ORF rosalind problem - 'Open Reading Frames'
+#Author: Peter Vlasveld
+
+#function to generate the orfs
+def orfGen(stri):
+	#dictionary for translation
+	translate = { 
+		"AAA":'K', "AAG":'K',
+		"GAA":'E', "GAG":'E',
+		"AAC":'N', "AAU":'N',
+		"GAC":'D', "GAU":'D',
+		"ACA":'T', "ACC":'T', "ACG":'T', "ACU":'T',
+		"GCA":"A", "GCC":"A", "GCG":"A", "GCU":"A",
+		"GGA":"G", "GGC":"G", "GGG":"G", "GGU":"G",
+		"GUA":"V", "GUC":"V", "GUG":"V", "GUU":"V",
+		"AUG":"M",
+		"UAA":"*", "UAG":"*", "UGA":"*",
+		"AUC":"I", "AUU":"I", "AUA":"I",
+		"UAC":"Y", "UAU":"Y",
+		"CAA":"Q", "CAG":"Q",
+		"AGC":"S", "AGU":"S", "UCA":"S", "UCC":"S", "UCG":"S", "UCU":"S",
+		"CAC":"H", "CAU":"H",
+		"UGC":"C", "UGU":"C",
+		"CCA":"P", "CCC":"P", "CCG":"P", "CCU":"P",
+		"UGG":"W",
+		"AGA":"R", "AGG":"R", "CGA":"R", "CGC":"R", "CGG":"R", "CGU":"R",
+		"UUA":"L", "UUG":"L", "CUA":"L", "CUC":"L", "CUG":"L", "CUU":"L",
+		"UUC":"F", "UUU":"F"
+	}
+	
+	#list to contain protein sequences
+	proteins = []
+
+	#loop that runs through the sequences from each amino acid in the sequence
+	for i in xrange(0, len(stri)-2):
+		tempStr = ""
+		tempBool = False
+		j = i
+		#find an orf and break when it is finished
+		while j < len(stri)-2:
+			if translate[stri[j:j+3]] == "*":
+				tempBool = False
+			if translate[stri[j:j+3]] == "M":
+				tempBool = True
+			if tempBool:
+				tempStr += translate[stri[j:j+3]]
+			else:
+				break
+			j += 3
+		#add the orf to proteins only if it ends with a stop codon
+		if tempStr != "" and tempBool == False:
+			proteins.extend([tempStr])
+	#return the protein list			
+	return proteins
+
+#function to return the DNA compliment
+def DNACompliment(stri):
+	rev = stri[::-1]
+	revList = list(rev)
+	for i in xrange(0, len(revList)):
+		if revList[i] == "A":
+			revList[i] = "T"
+		elif revList[i] == "T":
+			revList[i] = "A"
+		elif revList[i] == "G":
+			revList[i] = "C"
+		elif revList[i] == "C":
+			revList[i] = "G"
+	revCompStr = "".join(revList)
+	return revCompStr
+
+#get file content
+f = open("rosalind_orf.txt")
+content = f.readlines()
+f.close()
+
+DNAStr = ""
+#construct full DNA sequence
+for i in xrange(1, len(content)):
+	DNAStr += content[i]
+
+#remove whitespace
+noWhiteDNA = "".join(DNAStr.split())
+
+#convert DNA to RNA
+RNA = noWhiteDNA.replace("T", "U")
+
+#get DNA compliment and convert that to RNA as well
+revCompRNA = DNACompliment(noWhiteDNA).replace("T", "U")
+
+#get orfs for both sequences
+protList = orfGen(RNA)
+protList.extend(orfGen(revCompRNA))
+
+#get rid of duplicates
+finalList = list(set(protList))
+
+#print the list
+for i in finalList:
+	print(i)
--- a/python/PROB.py
+++ b/python/PROB.py
@@ -0,0 +1,46 @@
+#!/usr/bin/python3
+"""
+Solutions to the PROB Rosalind Problem - Introduction to Random Strings
+Author: Peter Vlasveld
+Date: 20190222
+"""
+import math
+
+#Read in file content into list
+f1 = open("rosalind_prob.txt")
+content = f1.readlines()
+f1.close()
+
+"""
+Function to calculate the probability of the exact sequence given the GC-content
+@param gc - the GC-content given 
+@param seq - the sequence being studied
+
+@return - the raw probability value
+"""
+def calcProb(gc, seq):
+    # get probability for just one nucleotide
+    gchalf = gc/2.0
+    athalf = (1.0-gc)/2.0
+
+    # multiply all of the probability values for each letter in the sequence
+    prob = 1.0
+    for i in range(0, len(seq)):
+        if seq[i] == 'A' or seq[i] == 'T':
+            prob *= athalf
+        elif seq[i] == 'G' or seq[i] == 'C':
+            prob *= gchalf
+    
+    # return the probability value
+    return prob
+
+# take the log of the probabilities returned by calcProb
+# format them to 3 decimal places
+gcCons = content[1].split( )
+finalProbs = []
+for i in gcCons:
+    rawProb = calcProb(float(i), content[0])
+    finalProbs.append("%.3f" % math.log10(rawProb))
+
+# print the probabilities with spaces in between
+print(" ".join(finalProbs))
--- a/python/REVP.py
+++ b/python/REVP.py
@@ -0,0 +1,50 @@
+#!/usr/bin/python
+
+#Solution to REVP Rosalind problem - Locating Restriction Sites
+#Author: Peter Vlasveld
+
+#read in file and get contents
+f1 = open("rosalind_revp.txt")
+content = f1.readlines()
+f1.close()
+
+#get DNA string from content
+DNAStr = ""
+for i in xrange(1,len(content)):
+    DNAStr += content[i].strip()
+
+#function to return the DNA compliment
+def DNACompliment(stri):
+	rev = stri[::-1]
+	revList = list(rev)
+	for i in xrange(0, len(revList)):
+		if revList[i] == "A":
+			revList[i] = "T"
+		elif revList[i] == "T":
+			revList[i] = "A"
+		elif revList[i] == "G":
+			revList[i] = "C"
+		elif revList[i] == "C":
+			revList[i] = "G"
+	revCompStr = "".join(revList)
+	return revCompStr
+
+#loop through DNA string
+indexes = []
+lengths = []
+for j in xrange(0, len(DNAStr)-3):
+	#evaluating 4 to 12 character sets, taking the reverse compliment and comparing,
+	#if a match is found record the index and length and then break
+	for k in xrange(4, 13):
+		comp = DNACompliment(DNAStr[j:j+k])
+		if comp == DNAStr[j:j+k]:
+			indexes.extend([j+1])
+			lengths.extend([k])
+			print(DNAStr[j:j+k])
+			break
+
+#output results to a file
+f2 = open("output.txt","w")			
+for i in xrange(0,len(indexes)):
+	f2.write(str(indexes[i]) + " " + str(lengths[i]) + "\n")
+f2.close()