From 80bc80633c9019f0277b58f91f6b69351e1896f6 Mon Sep 17 00:00:00 2001 From: Peter Vlasveld Date: Sat, 3 Nov 2018 10:22:22 -0400 Subject: [PATCH] first commit --- BINS.pl | 54 ++++++++++++++++++++++++++++++++++++++++++++++ FIB.pl | 31 +++++++++++++++++++++++++++ GC.pl | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ GRPH.pl | 49 ++++++++++++++++++++++++++++++++++++++++++ IPRB.pl | 37 ++++++++++++++++++++++++++++++++ REVC.pl | 41 +++++++++++++++++++++++++++++++++++ 6 files changed, 278 insertions(+) create mode 100644 BINS.pl create mode 100644 FIB.pl create mode 100644 GC.pl create mode 100644 GRPH.pl create mode 100644 IPRB.pl create mode 100644 REVC.pl diff --git a/BINS.pl b/BINS.pl new file mode 100644 index 0000000..d1e21ba --- /dev/null +++ b/BINS.pl @@ -0,0 +1,54 @@ +=pod +Binary search program for solving Rosalind BINS problem +Author: Peter Vlasveld +=cut +#!/usr/bin/perl +use strict; +use warnings; + +#take in data +open(IN, "rosalind_bins.txt") or die "Couldn't open the input file"; +my @data = ; +close IN; + +#declare and fill data arrays +my @list = split / /, $data[2]; +my @keys = split / /, $data[3]; + +#declare arrays +my (@outArr, @test); + +#run binary searches and store in @outArr +for (@keys){ + my $result = binSearch($_,\@list,0,$#list); + push(@outArr, $result); +} + +#join search results into one string +my $outStr = join(' ', @outArr); + +#open file and print output to it +open(OUT, ">outlog.txt") or die "Couldn't open the output file"; +print OUT $outStr; +close OUT; + +#also print the output to the console +print $outStr, "\n"; + +#binary search algorithm +sub binSearch{ + my ($key, $list_ref, $left, $right) = @_; + + if ($left > $right) { return -1; } + + my $middle = int(($left + $right) / 2); + print "$middle\n"; + my @array = @{$list_ref}; + + if ($array[$middle] == $key) { return int($middle)+1; } + elsif ($array[$middle] > $key) { + return binSearch($key, $list_ref, int($left), int($middle-1)); + }else{ + return binSearch($key, $list_ref, int($middle+1), int($right)); + } +} diff --git a/FIB.pl b/FIB.pl new file mode 100644 index 0000000..38a7b29 --- /dev/null +++ b/FIB.pl @@ -0,0 +1,31 @@ +=pod +Solution to the FIB Rosalind problem - a modified fibonacci algorithm +Author: Peter Vlasveld +=cut + +#!/usr/bin/perl +use strict; +use warnings; + +#take in data from file +open(IN, "rosalind_fib.txt") or die "Couldn't open input file"; +my $data = ; +close IN; + +#splitinput values into array +my @input = split / /, $data; + +#print output of fibonacci algorithm to console +print fibo($input[0],$input[1]), "\n"; + +#modified fibonacci algorithm to allow for k rabbit pairs per month rather than just 1 +sub fibo{ + my ($num,$pairs) = @_; + if ($num == 0) { return 0; } + if ($num == 1) { return 1; } + my $result = fibo($num-1,$pairs); + + $result += (fibo($num-2,$pairs))*$pairs; + + return $result; +} diff --git a/GC.pl b/GC.pl new file mode 100644 index 0000000..3464750 --- /dev/null +++ b/GC.pl @@ -0,0 +1,66 @@ +=pod +Solution to GC Rosalind problem +Author: Peter Vlasveld +=cut + +#!/usr/bin/perl +use strict; +use warnings; + +#take in data from file +open(IN, "rosalind_gc.txt") or die "Couldn't open input file"; +my @data = ; +close IN; + +#declare variables +my @ID; +my @sequences; +my @tempArr; +my $finalID; +my $finalPerc = 0; + +#add first @data line to @ID +$ID[0] = $data[0]; + +#loop through 1..$#data +for (1..$#data){ + #when a line starts with >, + if (substr($data[$_],0,1) eq '>'){ + #add it to @ID, join the temp array and add it to @sequences + push @ID, $data[$_]; + my $temp = join ('', @tempArr); + push @sequences, $temp; + @tempArr = undef; + } else { + #add line to temp array + push @tempArr, $data[$_]; + } +} + +#join last temp and add to sequences +my $temp = join('', @tempArr); +push @sequences, $temp; + +#loop through @sequences +for (0..$#sequences){ + $sequences[$_] =~ s/\s//g; + print $sequences[$_]; + #declare percentage + my $percentage; + my $GC; + my $total = length $sequences[$_]; + #determine percentage of GC in the sequence + $GC = ($sequences[$_] =~ tr/GC//); + $percentage = ($GC/$total)*100; + print "$percentage\n"; + #if percentage is greater than finalPerc, then finalPerc eq percentage and finalID eq ID + if ($percentage>$finalPerc) { + $finalPerc = $percentage; + $finalID = $ID[$_]; + } +} + +#output id and percent +printf("%s%f\n", $finalID, $finalPerc); + + diff --git a/GRPH.pl b/GRPH.pl new file mode 100644 index 0000000..525981a --- /dev/null +++ b/GRPH.pl @@ -0,0 +1,49 @@ +=pod +Solutions to GRPH Rosalind problem +Author: Peter Vlasveld +=cut + +#!/usr/bin/perl +use common::sense; + +#take in data +open(IN, "rosalind_grph.txt") or die "Couldn't open input file"; +my @data = ; +close IN; + +#declare variables +my (@headers, @DNA, @result); +my $temp = ""; + +#convert FASTA data into array of DNA strings and headers +for (@data){ + unless (substr($_,0,1) eq '>'){ + $temp .= substr($_,0,-1); + } else { + push @headers, substr($_,0,-1); + push @DNA, $temp; + $temp = ""; + } +} +push @DNA, $temp; +shift @DNA; +#print @headers,"\n",@DNA,"\n"; + +#write which headers overlap to @result +for my $i (0..$#DNA){ + for my $j (0..$#DNA){ + print "$DNA[$i]$DNA[$j]\n"; + if ($DNA[$i] eq $DNA[$j]) { next; } + elsif (substr($DNA[$i],-3) eq substr($DNA[$j],0,3)){ + push @result, substr($headers[$i],1)." ".substr($headers[$j],1); + } + } +} + +#print result to file +open(OUT, ">output.txt") or die "Couldn't open output file"; +for (@result){ + print OUT $_,"\n"; +} +close OUT; + diff --git a/IPRB.pl b/IPRB.pl new file mode 100644 index 0000000..f96c52b --- /dev/null +++ b/IPRB.pl @@ -0,0 +1,37 @@ +=pod +Solution to IPRB Rosalind problem +Author: Peter Vlasveld +=cut + +#!/usr/bin/perl +use strict; +use warnings; + +#take in data from file +open(IN, "rosalind_iprb.txt") or die "Couldn't open input file"; +my $data = ; +close IN; + +#split dataset into array +my @dataset = split / /, $data; + +#calculate total probability ratios +my $total = $dataset[0] + $dataset[1] + $dataset[2]; +my $initmRatio = $dataset[1]/$total; +my $initnRatio = $dataset[2]/$total; + +#calculate values of progeny from probability ratios and population data +my $sameHeteroDom = ($initmRatio * (($dataset[1]-1)/($total-1)))*0.25; +#print $sameHeteroDom, "\n"; +my $oneAndOne1 = ($initmRatio * ($dataset[2]/($total-1)))*0.5; +#print $oneAndOne1, "\n"; +my $oneAndOne2 = ($initnRatio * ($dataset[1]/($total-1)))*0.5; +#print $oneAndOne2, "\n"; +my $sameHeteroRec = $initnRatio * (($dataset[2]-1)/($total-1)); +#print $sameHeteroRec, "\n"; + +#calculate resulting progeny +my $result = 1-($sameHeteroDom + $oneAndOne1 + $oneAndOne2 + $sameHeteroRec); + +#output result to console +printf("%.5f\n", $result); diff --git a/REVC.pl b/REVC.pl new file mode 100644 index 0000000..ee78d46 --- /dev/null +++ b/REVC.pl @@ -0,0 +1,41 @@ +=pod +Solution to the Rosalind REVC problem +Author: Peter Vlasveld +=cut + +#!/usr/bin/perl +use strict; +use warnings; +use Switch; + +#take in dataset +open(IN, "rosalind_revc.txt") or die "Couldn't open input file"; +my $dataset = ; +close IN; + +#split data into single letters +my @dna = split //, $dataset; + +#declare complement array +my @complement; + +#make complement array which corresponds to @dna array +for (@dna){ + switch($_){ + case 'A' { push @complement, 'T'; } + case 'T' { push @complement, 'A'; } + case 'G' { push @complement, 'C'; } + case 'C' { push @complement, 'G'; } + } +} + +#reverse the complement +@complement = reverse @complement; + +#join back into a single string and output to a file +my $result = join '', @complement; +open(OUT, ">outlog.txt") or die "couldn't open output file"; +print OUT $result; + +#print solution to console +print "$result\n";