rosalind-solutions/python/PROB.py

53 lines
1.2 KiB
Python
Raw Normal View History

2019-02-22 15:35:48 +00:00
#!/usr/bin/python3
"""
Solutions to the PROB Rosalind Problem - Introduction to Random Strings
Author: Peter Vlasveld
Date: 20190222
"""
2025-02-19 02:12:09 +00:00
2019-02-22 15:35:48 +00:00
import math
2025-02-19 02:12:09 +00:00
# Read in file content into list
2019-02-22 15:35:48 +00:00
f1 = open("rosalind_prob.txt")
content = f1.readlines()
f1.close()
"""
2025-02-19 02:12:09 +00:00
Function to calculate the probability of the exact sequence given the
GC-content
@param gc - the GC-content given
2019-02-22 15:35:48 +00:00
@param seq - the sequence being studied
@return - the raw probability value
"""
2025-02-19 02:12:09 +00:00
2019-02-22 15:35:48 +00:00
def calcProb(gc, seq):
# get probability for just one nucleotide
2025-02-19 02:12:09 +00:00
gchalf = gc / 2.0
athalf = (1.0 - gc) / 2.0
2019-02-22 15:35:48 +00:00
# multiply all of the probability values for each letter in the sequence
prob = 1.0
for i in range(0, len(seq)):
2025-02-19 02:12:09 +00:00
if seq[i] == "A" or seq[i] == "T":
2019-02-22 15:35:48 +00:00
prob *= athalf
2025-02-19 02:12:09 +00:00
elif seq[i] == "G" or seq[i] == "C":
2019-02-22 15:35:48 +00:00
prob *= gchalf
2025-02-19 02:12:09 +00:00
2019-02-22 15:35:48 +00:00
# return the probability value
return prob
2025-02-19 02:12:09 +00:00
2019-02-22 15:35:48 +00:00
# take the log of the probabilities returned by calcProb
# format them to 3 decimal places
2025-02-19 02:12:09 +00:00
gcCons = content[1].split()
2019-02-22 15:35:48 +00:00
finalProbs = []
for i in gcCons:
rawProb = calcProb(float(i), content[0])
finalProbs.append("%.3f" % math.log10(rawProb))
# print the probabilities with spaces in between
2025-02-19 02:12:09 +00:00
print(" ".join(finalProbs))