erange 4.0a dev release with integrated cistematic
[erange.git] / cistematic / programs / mafft.py
diff --git a/cistematic/programs/mafft.py b/cistematic/programs/mafft.py
new file mode 100644 (file)
index 0000000..e2d725f
--- /dev/null
@@ -0,0 +1,121 @@
+###########################################################################
+#                                                                         #
+# C O P Y R I G H T   N O T I C E                                         #
+#  Copyright (c) 2003-10 by:                                              #
+#    * California Institute of Technology                                 #
+#                                                                         #
+#    All Rights Reserved.                                                 #
+#                                                                         #
+# Permission is hereby granted, free of charge, to any person             #
+# obtaining a copy of this software and associated documentation files    #
+# (the "Software"), to deal in the Software without restriction,          #
+# including without limitation the rights to use, copy, modify, merge,    #
+# publish, distribute, sublicense, and/or sell copies of the Software,    #
+# and to permit persons to whom the Software is furnished to do so,       #
+# subject to the following conditions:                                    #
+#                                                                         #
+# The above copyright notice and this permission notice shall be          #
+# included in all copies or substantial portions of the Software.         #
+#                                                                         #
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,         #
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF      #
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND                   #
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS     #
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN      #
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN       #
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE        #
+# SOFTWARE.                                                               #
+###########################################################################
+#
+# mafft.py
+from cistematic.programs import Program
+import os, time
+from cistematic.core.motif import Motif
+
+
+class Mafft(Program):
+    """ Multiple Alignment using Fast Fourier Transform. Uses fftnsi as described in:
+        K. Katoh, K. Misawa, K. Kuma and T. Miyata (2002)
+        Nucleic Acids Research 30: 3059-3066.
+    """
+    mafftPath = "%s/mafft/" % Program.programRoot
+    motifs = []
+    argDict = {}
+
+
+    def getSettings(self):
+        return self.argDict
+
+
+    def setSettings(self, settings):
+        self.motifs = []
+        self.argDict = settings
+
+
+    def buildCommand(self):
+        cmd = self.mafftPath + "fftnsi "
+        for arg in self.argDict.keys():
+            cmd = cmd + " --" + arg + " %s" % str(self.argDict[arg])
+        cmd +=  " --quiet " + self.inputFilePath 
+        print "cmd is %s" % (cmd)
+
+        return cmd
+
+
+    def setGapOpening(self, op):
+        """ Gap opening penalty. Default is 1.58
+        """
+        self.argDict["op"] = op
+
+
+    def setOffset(self, ep):
+        """ Offset - like a gap expansion penalty. Default is 0.120
+        """
+        self.argDict["ep"] = ep
+
+
+    def setScoringMatrix(self, bl):
+        """ set Blossum scoring matrix. Choices are 30, 45, 62, and 80.
+        """
+        self.argDict["bl"] = bl
+
+
+    def setMaxiterate(self, maxi):
+        """ maximum number of iterations in progressive method.
+        """
+        self.argDict["maxiterate"] = maxi
+
+
+    def setRetree(self, tnum):
+        """ number of tree building in progressive method.
+        """
+        self.argDict["retree"] = tnum
+
+
+    def run(self):
+        startTime = time.time()
+        self.contents = os.popen(Mafft.buildCommand(self)).readlines()
+        stopTime = time.time()
+    
+        print "\nThis run took %.3f - %.3f = %.3f seconds" % (startTime, stopTime, stopTime - startTime)
+
+
+    def getAlignment(self):
+        """ take the results stored in self.contents and return a dictionary for inclusion into the genepool.
+        """
+        alignedDict= {}
+        dictKey = ""
+        sequence = ""
+        for line in self.contents:
+            if line[0] == ">":
+                if len(dictKey) > 0 and len(sequence) > 0:
+                    alignedDict[dictKey] = sequence
+                dictKey = line[2:-1]
+                sequence = ""
+            else:
+                sequence += line[:-1]
+
+        if len(dictKey) > 0 and len(sequence) > 0:
+            alignedDict[dictKey] = sequence
+
+        return alignedDict
\ No newline at end of file