X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=cistematic%2Fprograms%2Fmafft.py;fp=cistematic%2Fprograms%2Fmafft.py;h=e2d725ff7f434fe83b9d74b16343e8bdb5cd165e;hp=0000000000000000000000000000000000000000;hb=bc30aca13e5ec397c92e67002fbf7a103130b828;hpb=0d3e3112fd04c2e6b44a25cacef1d591658ad181 diff --git a/cistematic/programs/mafft.py b/cistematic/programs/mafft.py new file mode 100644 index 0000000..e2d725f --- /dev/null +++ b/cistematic/programs/mafft.py @@ -0,0 +1,121 @@ +########################################################################### +# # +# C O P Y R I G H T N O T I C E # +# Copyright (c) 2003-10 by: # +# * California Institute of Technology # +# # +# All Rights Reserved. # +# # +# Permission is hereby granted, free of charge, to any person # +# obtaining a copy of this software and associated documentation files # +# (the "Software"), to deal in the Software without restriction, # +# including without limitation the rights to use, copy, modify, merge, # +# publish, distribute, sublicense, and/or sell copies of the Software, # +# and to permit persons to whom the Software is furnished to do so, # +# subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be # +# included in all copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +########################################################################### +# +# mafft.py +from cistematic.programs import Program +import os, time +from cistematic.core.motif import Motif + + +class Mafft(Program): + """ Multiple Alignment using Fast Fourier Transform. Uses fftnsi as described in: + K. Katoh, K. Misawa, K. Kuma and T. Miyata (2002) + Nucleic Acids Research 30: 3059-3066. + """ + mafftPath = "%s/mafft/" % Program.programRoot + motifs = [] + argDict = {} + + + def getSettings(self): + return self.argDict + + + def setSettings(self, settings): + self.motifs = [] + self.argDict = settings + + + def buildCommand(self): + cmd = self.mafftPath + "fftnsi " + for arg in self.argDict.keys(): + cmd = cmd + " --" + arg + " %s" % str(self.argDict[arg]) + cmd += " --quiet " + self.inputFilePath + print "cmd is %s" % (cmd) + + return cmd + + + def setGapOpening(self, op): + """ Gap opening penalty. Default is 1.58 + """ + self.argDict["op"] = op + + + def setOffset(self, ep): + """ Offset - like a gap expansion penalty. Default is 0.120 + """ + self.argDict["ep"] = ep + + + def setScoringMatrix(self, bl): + """ set Blossum scoring matrix. Choices are 30, 45, 62, and 80. + """ + self.argDict["bl"] = bl + + + def setMaxiterate(self, maxi): + """ maximum number of iterations in progressive method. + """ + self.argDict["maxiterate"] = maxi + + + def setRetree(self, tnum): + """ number of tree building in progressive method. + """ + self.argDict["retree"] = tnum + + + def run(self): + startTime = time.time() + self.contents = os.popen(Mafft.buildCommand(self)).readlines() + stopTime = time.time() + + print "\nThis run took %.3f - %.3f = %.3f seconds" % (startTime, stopTime, stopTime - startTime) + + + def getAlignment(self): + """ take the results stored in self.contents and return a dictionary for inclusion into the genepool. + """ + alignedDict= {} + dictKey = "" + sequence = "" + for line in self.contents: + if line[0] == ">": + if len(dictKey) > 0 and len(sequence) > 0: + alignedDict[dictKey] = sequence + dictKey = line[2:-1] + sequence = "" + else: + sequence += line[:-1] + + if len(dictKey) > 0 and len(sequence) > 0: + alignedDict[dictKey] = sequence + + return alignedDict \ No newline at end of file