erange 4.0a dev release with integrated cistematic
[erange.git] / cistematic / programs / meme.py
diff --git a/cistematic/programs/meme.py b/cistematic/programs/meme.py
new file mode 100644 (file)
index 0000000..3e4bba6
--- /dev/null
@@ -0,0 +1,182 @@
+###########################################################################
+#                                                                         #
+# C O P Y R I G H T   N O T I C E                                         #
+#  Copyright (c) 2003-10 by:                                              #
+#    * California Institute of Technology                                 #
+#                                                                         #
+#    All Rights Reserved.                                                 #
+#                                                                         #
+# Permission is hereby granted, free of charge, to any person             #
+# obtaining a copy of this software and associated documentation files    #
+# (the "Software"), to deal in the Software without restriction,          #
+# including without limitation the rights to use, copy, modify, merge,    #
+# publish, distribute, sublicense, and/or sell copies of the Software,    #
+# and to permit persons to whom the Software is furnished to do so,       #
+# subject to the following conditions:                                    #
+#                                                                         #
+# The above copyright notice and this permission notice shall be          #
+# included in all copies or substantial portions of the Software.         #
+#                                                                         #
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,         #
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF      #
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND                   #
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS     #
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN      #
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN       #
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE        #
+# SOFTWARE.                                                               #
+###########################################################################
+#
+# meme.py
+import os
+import time
+import string
+from cistematic.programs import Program
+from cistematic.core.motif import Motif
+from erange.commoncode import getConfigParser, getConfigOption
+
+SUPPORTED_MODELS = ["oops", "zoops", "tcm"]
+
+class Meme(Program):
+    
+
+    def __init__(self):
+        parser = getConfigParser()
+        memeProgramName = getConfigOption(parser, "programs", "meme", default="meme.3.0.8")
+        self.memePath = string.join([Program.programRoot, memeProgramName], "/")
+        self.model = "zoops"
+        self.background = ""
+        self.numMotifs = 10
+        self.minWidth = 6
+        self.maxWidth = 15
+        self.bfile = ""
+        self.motifs = []
+        self.contents = []
+
+
+    def getSettings(self):
+        return (self.model, self.background, self.numMotifs, self.bfile)
+
+
+    def setSettings(self, settings):
+        self.clearMotifList()
+        try:
+            (self.model, self.background, self.numMotifs, self.bfile) = settings
+        except ValueError:
+            print "Error unpacking settings for Meme. No parameters changed."
+
+
+    def setGenExpOptions(self, optionArray):
+        for option in optionArray:
+            try:
+                (optionName, optionValue) = option.split(":")
+            except ValueError:
+                continue
+
+            if optionName == "model":
+                self.setModel(optionValue)
+
+            if optionName == "nmotifs":
+                self.setNumMotifs(optionValue)
+
+            if optionName == "maxwidth":
+                self.setMaxWidth(optionValue)
+
+
+    def buildCommand(self):
+        argList = ["%s/bin/meme" % self.memePath,
+                   self.inputFilePath,
+                   "-dna -maxsize 1000000",
+                   "-maxw %d" % self.maxWidth,
+                   "-minw %d" % self.minWidth,
+                   "-mod %s" % self.model,
+                   "-revcomp -nmotifs %d" % self.numMotifs
+        ]
+
+        if self.bfile != "":
+            argList.append("-bfile %s" % self.bfile)
+
+        cmd = string.join(argList, " ")
+
+        return cmd
+
+
+    def setModel(self, modelType):
+        if modelType in SUPPORTED_MODELS:
+            self.model = modelType
+
+
+    def setNumMotifs(self, motifNum):
+        self.numMotifs = motifNum
+
+
+    def setMinWidth(self, width):
+        self.minWidth = width
+
+
+    def setMaxWidth(self, width):
+        self.maxWidth = width
+
+
+    def setBackground(self, backgroundFileName):
+        self.bfile = backgroundFileName
+
+
+    def clearMotifList(self):
+        self.motifs = []
+
+
+    def setContents(self, motifFile):
+        self.contents = motifFile.readlines()
+
+
+    def run(self):
+        startTime = time.time()
+        memeResultFile = os.popen(Meme.buildCommand(self))
+        self.setContents(memeResultFile)
+        stopTime = time.time()
+    
+        print "\nThis run took %.3f seconds and produced %d lines" % (stopTime - startTime, len(self.contents))
+    def getMotifs(self):
+        index = 0
+        self.clearMotifList()
+        try:
+            for motif in range(0, self.numMotifs):
+                PWM = []
+                seqList = []
+                index = self.locateMotifSeqs(index)
+                info = self.contents[index]
+                index += 1
+                while string.find(self.contents[index], "//") < 0:
+                    fields = self.contents[index].split()
+                    seqList.append(fields[-2])
+                    index += 1
+
+                motifName = "%s-meme-%d" % (self.tagID, motif + 1)
+                self.motifs.append(Motif(motifName, "", PWM, seqList, 0.0, info))
+        except:
+            pass
+
+        return self.motifs
+
+
+    def locateMotif(self, startingLineNum):
+        return self.getDataLineNumberAfterHeader("pspm_doc", startingLineNum)
+
+
+    def locateMotifSeqs(self, startingLineNum):
+        return self.getDataLineNumberAfterHeader("BLOCKS_doc", startingLineNum)
+
+
+    def getDataLineNumberAfterHeader(self, headerText, startingLineNum):
+        currentLineNum = startingLineNum
+        numLines = len(self.contents)
+        while currentLineNum < numLines and string.find(self.contents[currentLineNum], headerText) < 0:
+            currentLineNum += 1
+
+        if currentLineNum < numLines:
+            return currentLineNum + 3
+
+        return -1
\ No newline at end of file