diff --git a/go/README.md b/go/README.md new file mode 100644 index 0000000..598e871 --- /dev/null +++ b/go/README.md @@ -0,0 +1,5 @@ +# go-rosalind + +go-rosalind is a command-line utility for running solutions to [Rosalind.info](rosalind.info) problems that were written in go. + +This is mostly just used by me to consolidate some repeated code and practice writing stuff in Go. diff --git a/go/cmd/cons.go b/go/cmd/cons.go new file mode 100644 index 0000000..46c3896 --- /dev/null +++ b/go/cmd/cons.go @@ -0,0 +1,19 @@ +package cmd + +import ( + "github.com/spf13/cobra" + "gitlab.com/fizzizist/go-rosalind/pkg/solutions" +) + +var consCmd = &cobra.Command{ + Use: "cons", + Short: "Finds a Motif in input DNA", + Args: cobra.MinimumNArgs(1), + Run: func(cmd *cobra.Command, args []string) { + solutions.Cons(args[0]) + }, +} + +func init() { + rootCmd.AddCommand(consCmd) +} diff --git a/go/cmd/root.go b/go/cmd/root.go new file mode 100644 index 0000000..b8179c5 --- /dev/null +++ b/go/cmd/root.go @@ -0,0 +1,12 @@ +package cmd + +import "github.com/spf13/cobra" + +var rootCmd = &cobra.Command{ + Use: "go-rosalind", + Short: "An app for running Rosalind solutions", +} + +func Execute() error { + return rootCmd.Execute() +} diff --git a/go/cmd/subs.go b/go/cmd/subs.go new file mode 100644 index 0000000..fcee4cc --- /dev/null +++ b/go/cmd/subs.go @@ -0,0 +1,19 @@ +package cmd + +import ( + "github.com/spf13/cobra" + "gitlab.com/fizzizist/go-rosalind/pkg/solutions" +) + +var dnaMotifsCmd = &cobra.Command{ + Use: "subs", + Short: "Finds a Motif in input DNA", + Args: cobra.MinimumNArgs(1), + Run: func(cmd *cobra.Command, args []string) { + solutions.FindMotifs(args[0]) + }, +} + +func init() { + rootCmd.AddCommand(dnaMotifsCmd) +} diff --git a/go/main.go b/go/main.go new file mode 100644 index 0000000..d4b2bc3 --- /dev/null +++ b/go/main.go @@ -0,0 +1,15 @@ +package main + +import ( + "fmt" + "os" + + "gitlab.com/fizzizist/go-rosalind/cmd" +) + +func main() { + if err := cmd.Execute(); err != nil { + fmt.Println(err) + os.Exit(1) + } +} diff --git a/go/pkg/io/file_tools.go b/go/pkg/io/file_tools.go new file mode 100644 index 0000000..3539a63 --- /dev/null +++ b/go/pkg/io/file_tools.go @@ -0,0 +1,86 @@ +package io + +import ( + "bufio" + "fmt" + "os" + "strings" +) + +func FileToStringArray(filename string) ([]string, error) { + file, err := os.Open(filename) + if err != nil { + return nil, err + } + defer file.Close() + + scanner := bufio.NewScanner(file) + scanner.Split(bufio.ScanLines) + + var fileLines []string + + for scanner.Scan() { + fileLines = append(fileLines, scanner.Text()) + } + if err := scanner.Err(); err != nil { + return nil, err + } + + file.Close() + + return fileLines, nil +} + +func WriteStringsToFile(content []string, filepath string) error { + file, err := os.Create(filepath) + if err != nil { + return err + } + + w := bufio.NewWriter(file) + + for _, line := range content { + _, err = fmt.Fprintln(w, line) + if err != nil { + return err + } + } + + if err = w.Flush(); err != nil { + return err + } + if err = file.Close(); err != nil { + return err + } + + return nil +} + +// ParseFasta parses a .fasta formatted file returning a list of labels and a list of +// content in separate string arrays. +func ParseFasta(filename string) ([]string, []string, error) { + lines, err := FileToStringArray(filename) + if err != nil { + return nil, nil, err + } + + var labels []string + var content []string + var currString []string + for _, line := range lines { + if line[0] == '>' { + labels = append(labels, line) + if len(currString) > 0 { + content = append(content, strings.Join(currString, "")) + currString = []string{} + } + continue + } + currString = append(currString, line) + } + if len(currString) > 0 { + content = append(content, strings.Join(currString, "")) + } + + return labels, content, nil +} diff --git a/go/pkg/solutions/cons.go b/go/pkg/solutions/cons.go new file mode 100644 index 0000000..36b2110 --- /dev/null +++ b/go/pkg/solutions/cons.go @@ -0,0 +1,76 @@ +package solutions + +import ( + "fmt" + "log" + "strconv" + "strings" + + "gitlab.com/fizzizist/go-rosalind/pkg/io" +) + +func CompileOutput(consensus string, profile [][]int) []string { + output := []string{consensus} + idxMap := []string{"A", "C", "G", "T"} + + for i, val := range profile { + var strProf []string + for _, inVal := range val { + strProf = append(strProf, strconv.Itoa(inVal)) + } + output = append(output, fmt.Sprintf("%s: %s", idxMap[i], strings.Join(strProf, " "))) + } + + return output +} + +func Cons(filename string) { + _, lines, err := io.ParseFasta(filename) + if err != nil { + log.Fatalf("Failed to read file: %s", err) + } + + profile := make([][]int, 4) // 1 for each codon + for i := range profile { + profile[i] = make([]int, len(lines[1])) + } + + for _, value := range lines { + for j, s := range value { + switch { + case s == 'A': + profile[0][j]++ + case s == 'C': + profile[1][j]++ + case s == 'G': + profile[2][j]++ + case s == 'T': + profile[3][j]++ + } + } + } + + var consensus []string + idxMap := []string{"A", "C", "G", "T"} + for i := range profile[0] { + maxCons := "A" + maxVal := 0 + for j, s := range idxMap { + if profile[j][i] > maxVal { + maxCons = s + maxVal = profile[j][i] + } + } + consensus = append(consensus, maxCons) + } + + fmt.Println("Writing output file...") + err = io.WriteStringsToFile( + CompileOutput(strings.Join(consensus, ""), profile), + "results/cons.txt", + ) + if err != nil { + log.Fatalf("Failed writing output file: %s", err) + } + fmt.Println("Done.") +} diff --git a/go/pkg/solutions/subs.go b/go/pkg/solutions/subs.go new file mode 100644 index 0000000..000e7d8 --- /dev/null +++ b/go/pkg/solutions/subs.go @@ -0,0 +1,33 @@ +package solutions + +import ( + "fmt" + "log" + "strconv" + "strings" + + "gitlab.com/fizzizist/go-rosalind/pkg/io" +) + +func FindMotifs(filename string) { + lines, err := io.FileToStringArray(filename) + if err != nil { + log.Fatalf("Failed to read file: %s", err) + } + if len(lines) != 2 { + log.Fatalf("Input file should have exactly 2 lines, a DNA string and a motif") + } + + dna := lines[0] + motif := lines[1] + motLen := len(motif) + var positions []string + + for i := 0; i <= len(dna)-motLen; i++ { + if motif == dna[i:i+motLen] { + positions = append(positions, strconv.Itoa(i+1)) + } + } + fmt.Println(strings.Join(positions, " ")) + +} diff --git a/go/testing/cons.fasta b/go/testing/cons.fasta new file mode 100644 index 0000000..b3bc037 --- /dev/null +++ b/go/testing/cons.fasta @@ -0,0 +1,14 @@ +>Rosalind_1 +ATCCAGCT +>Rosalind_2 +GGGCAACT +>Rosalind_3 +ATGGATCT +>Rosalind_4 +AAGCAACC +>Rosalind_5 +TTGGAACT +>Rosalind_6 +ATGCCATT +>Rosalind_7 +ATGGCACT diff --git a/go/testing/subs.txt b/go/testing/subs.txt new file mode 100644 index 0000000..a38c50d --- /dev/null +++ b/go/testing/subs.txt @@ -0,0 +1,2 @@ +GATATATGCATATACTT +ATAT diff --git a/scala/project/Dependencies.scala b/scala/project/Dependencies.scala new file mode 100644 index 0000000..558929d --- /dev/null +++ b/scala/project/Dependencies.scala @@ -0,0 +1,5 @@ +import sbt._ + +object Dependencies { + lazy val scalaTest = "org.scalatest" %% "scalatest" % "3.0.5" +} diff --git a/scala/src/main/scala/dna/dna.scala b/scala/src/main/scala/dna/dna.scala new file mode 100644 index 0000000..6801fde --- /dev/null +++ b/scala/src/main/scala/dna/dna.scala @@ -0,0 +1,14 @@ +package dna + +import scala.io.Source + +object DNA { + def main(args: Array[String]) = { + val filename = "DNA.txt" + val dnaStr = Source.fromFile(filename).getLines.mkString + val acgt = Array(0, 0, 0, 0) + val acgtMap = Map('A' -> 0, 'C' -> 1, 'G' -> 2, 'T' -> 3) + for (c <- dnaStr) acgt(acgtMap(c)) += 1 + println("%d %d %d %d".format(acgt(0), acgt(1), acgt(2), acgt(3))) + } +} diff --git a/scala/src/main/scala/fib/fib.scala b/scala/src/main/scala/fib/fib.scala new file mode 100644 index 0000000..f365210 --- /dev/null +++ b/scala/src/main/scala/fib/fib.scala @@ -0,0 +1,21 @@ +package fib + +import scala.io.Source + +object FIB { + def main(args: Array[String]) = { + val n = args(0).toInt + val k = args(1).toInt + var b = BigInt(0) + var a = BigInt(1) + var newA = BigInt(0) + var i = 0 + for (i <- 2 until n) { + a += newA + newA = b + b = a*k + } + val res = a + b + newA + println(res.toString) + } +} diff --git a/scala/src/main/scala/gc/gc.scala b/scala/src/main/scala/gc/gc.scala new file mode 100644 index 0000000..100bbaf --- /dev/null +++ b/scala/src/main/scala/gc/gc.scala @@ -0,0 +1,48 @@ +package gc + +import scala.io.Source +import java.io._ + + +class GCCalculator() { + var dnaStr = "" + var topContent = 0.0 + var topDNA = "" + var currentDNA = "" + + def checkCurrDNA() { + val gcCheck = (c: Char) => (c == 'G' || c == 'C') + val gcContent = dnaStr.filter(gcCheck).length + val contentPerc = (gcContent.toFloat / dnaStr.length.toFloat) * 100.0 + if (contentPerc > topContent) { + topContent = contentPerc + topDNA = currentDNA + } + } + + def printTopGC() { + val filename = "GC.txt" + for (line <- Source.fromFile(filename).getLines) { + if (line.startsWith(">")) { + if (dnaStr.length > 0) { + checkCurrDNA + } + currentDNA = line + dnaStr = "" + } else { + dnaStr += line + } + } + checkCurrDNA + println(topDNA) + println(f"$topContent%.6f") + } +} + + +object GC { + def main(args: Array[String]) = { + val gcCalc = new GCCalculator + gcCalc.printTopGC + } +} diff --git a/scala/src/main/scala/hamm/hamm.scala b/scala/src/main/scala/hamm/hamm.scala new file mode 100644 index 0000000..2f6bfbf --- /dev/null +++ b/scala/src/main/scala/hamm/hamm.scala @@ -0,0 +1,18 @@ +package hamm + +import scala.io.Source +import java.io._ + +object HAMM { + def main(args: Array[String]) = { + val filename = "HAMM.txt" + val dna = Source.fromFile(filename).getLines.toList + var count = 0 + for ((i, j) <- (dna(0), dna(1)).zipped) { + if (i != j) { + count += 1 + } + } + println(count) + } +} diff --git a/scala/src/main/scala/iprb/iprb.scala b/scala/src/main/scala/iprb/iprb.scala new file mode 100644 index 0000000..154011f --- /dev/null +++ b/scala/src/main/scala/iprb/iprb.scala @@ -0,0 +1,24 @@ +package rna + +import scala.io.Source + +object IPRB { + def main(args: Array[String]) = { + val popul = args.map(_.toDouble) + // algorithm just assumes the following inputs + // popul(0) = homozygous dominant + // popul(1) = heterozygous + // popul(2) = homozygous recessive + val total = popul.sum + // case of both homozygous recessive + var probabTotal = (popul(2) / total) * ((popul(2) - 1) / (total - 1)) + // case of heterozygous and homozygous recessive mix + probabTotal += ((popul(2) / total) * (popul(1) / (total - 1))) * 0.5 + // case of homozygous recessive and heterozygous mix + probabTotal += ((popul(1) / total) * (popul(2) / (total - 1))) * 0.5 + // case of both heterozygous + probabTotal += ((popul(1) / total) * ((popul(1) - 1) / (total - 1))) * 0.25 + probabTotal = 1 - probabTotal + println(f"$probabTotal%.5f") + } +} diff --git a/scala/src/main/scala/prot/prot.scala b/scala/src/main/scala/prot/prot.scala new file mode 100644 index 0000000..43e7e47 --- /dev/null +++ b/scala/src/main/scala/prot/prot.scala @@ -0,0 +1,84 @@ +package prot + +import scala.io.Source +import java.io._ + +object PROT { + def main(args: Array[String]) = { + val filename = "PROT.txt" + val rnaStr = Source.fromFile(filename).getLines.mkString + val transMap = Map( + "UUU" -> "F", + "CUU" -> "L", + "AUU" -> "I", + "GUU" -> "V", + "UUC" -> "F", + "CUC" -> "L", + "AUC" -> "I", + "GUC" -> "V", + "UUA" -> "L", + "CUA" -> "L", + "AUA" -> "I", + "GUA" -> "V", + "UUG" -> "L", + "CUG" -> "L", + "AUG" -> "M", + "GUG" -> "V", + "UCU" -> "S", + "CCU" -> "P", + "ACU" -> "T", + "GCU" -> "A", + "UCC" -> "S", + "CCC" -> "P", + "ACC" -> "T", + "GCC" -> "A", + "UCA" -> "S", + "CCA" -> "P", + "ACA" -> "T", + "GCA" -> "A", + "UCG" -> "S", + "CCG" -> "P", + "ACG" -> "T", + "GCG" -> "A", + "UAU" -> "Y", + "CAU" -> "H", + "AAU" -> "N", + "GAU" -> "D", + "UAC" -> "Y", + "CAC" -> "H", + "AAC" -> "N", + "GAC" -> "D", + "UAA" -> "Stop", + "CAA" -> "Q", + "AAA" -> "K", + "GAA" -> "E", + "UAG" -> "Stop", + "CAG" -> "Q", + "AAG" -> "K", + "GAG" -> "E", + "UGU" -> "C", + "CGU" -> "R", + "AGU" -> "S", + "GGU" -> "G", + "UGC" -> "C", + "CGC" -> "R", + "AGC" -> "S", + "GGC" -> "G", + "UGA" -> "Stop", + "CGA" -> "R", + "AGA" -> "R", + "GGA" -> "G", + "UGG" -> "W", + "CGG" -> "R", + "AGG" -> "R", + "GGG" -> "G", + ) + val rnaSegs = rnaStr.grouped(3).toList + var protStr = "" + rnaSegs.iterator.takeWhile(transMap(_) != "Stop").foreach(protStr += transMap(_)) + println(protStr) + val pw = new PrintWriter(new File("PROTOut.txt")) + pw.write(protStr) + pw.close() + } +} diff --git a/scala/src/main/scala/revc/revc.scala b/scala/src/main/scala/revc/revc.scala new file mode 100644 index 0000000..9848d69 --- /dev/null +++ b/scala/src/main/scala/revc/revc.scala @@ -0,0 +1,16 @@ +package rna + +import scala.io.Source +import java.io._ + +object RNA { + def main(args: Array[String]) = { + val filename = "REVC.txt" + val dnaStr = Source.fromFile(filename).getLines.mkString + val compMap = Map('A' -> 'T', 'T' -> 'A', 'C' -> 'G', 'G' -> 'C') + val comp = dnaStr.reverse.map(compMap(_)) + val pw = new PrintWriter(new File("REVCOut.txt")) + pw.write(comp) + pw.close() + } +} diff --git a/scala/src/main/scala/rna/rna.scala b/scala/src/main/scala/rna/rna.scala new file mode 100644 index 0000000..fa00496 --- /dev/null +++ b/scala/src/main/scala/rna/rna.scala @@ -0,0 +1,15 @@ +package rna + +import scala.io.Source +import java.io._ + +object RNA { + def main(args: Array[String]) = { + val filename = "RNA.txt" + val dnaStr = Source.fromFile(filename).getLines.mkString + val rnaStr = dnaStr.replace('T', 'U') + val pw = new PrintWriter(new File("RNAOut.txt")) + pw.write(rnaStr) + pw.close() + } +} diff --git a/scala/src/test/scala/example/HelloSpec.scala b/scala/src/test/scala/example/HelloSpec.scala new file mode 100644 index 0000000..56f5e66 --- /dev/null +++ b/scala/src/test/scala/example/HelloSpec.scala @@ -0,0 +1,9 @@ +package example + +import org.scalatest._ + +class HelloSpec extends FlatSpec with Matchers { + "The Hello object" should "say hello" in { + Hello.greeting shouldEqual "hello" + } +}