erange 4.0a dev release with integrated cistematic
[erange.git] / cistematic / experiments / genexp.py
diff --git a/cistematic/experiments/genexp.py b/cistematic/experiments/genexp.py
new file mode 100644 (file)
index 0000000..b6dae71
--- /dev/null
@@ -0,0 +1,252 @@
+###########################################################################
+#                                                                         #
+# C O P Y R I G H T   N O T I C E                                         #
+#  Copyright (c) 2003-10 by:                                              #
+#    * California Institute of Technology                                 #
+#                                                                         #
+#    All Rights Reserved.                                                 #
+#                                                                         #
+# Permission is hereby granted, free of charge, to any person             #
+# obtaining a copy of this software and associated documentation files    #
+# (the "Software"), to deal in the Software without restriction,          #
+# including without limitation the rights to use, copy, modify, merge,    #
+# publish, distribute, sublicense, and/or sell copies of the Software,    #
+# and to permit persons to whom the Software is furnished to do so,       #
+# subject to the following conditions:                                    #
+#                                                                         #
+# The above copyright notice and this permission notice shall be          #
+# included in all copies or substantial portions of the Software.         #
+#                                                                         #
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,         #
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF      #
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND                   #
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS     #
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN      #
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN       #
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE        #
+# SOFTWARE.                                                               #
+###########################################################################
+#
+import os
+import string
+from cistematic.programs import supportedPrograms
+from cistematic.experiments import experimentTypes, loadExperiment
+from os import environ
+
+if environ.get("CISTEMATIC_ROOT"):
+    cisRoot = environ.get("CISTEMATIC_ROOT") 
+else:
+    cisRoot = "/proj/genome"
+
+
+class GenExp:
+    dataPath = ""
+    dbFilePath = ""
+    expFilePath = ""
+    expName = ""
+    expType = ""
+    dataset = []
+    expProgs = []
+    expSettings = {}
+    analysisName = ""
+
+
+    def __init__(self, path, dbPath, newexp, type="", dset="", progs="", analysis="", createExp=False):
+        self.dataPath = path
+        self.dbFilePath = dbPath
+        self.expName = newexp
+        self.expFilePath = string.replace(dbPath, ".db", "-db.py")
+        self.expType = type
+        self.dataset = dset
+        self.expProgs = progs
+        self.analysisName = newexp + analysis
+
+        if createExp:
+            self.createExpDB()
+
+        if len(self.expProgs) == 0:
+            exp = loadExperiment( self.expName , self.dbFilePath)
+            self.expType = exp.experimentType
+            self.expProgs = exp.getSetting("expProgs")
+
+        if len(dset) == 0:
+            exp = loadExperiment( self.expName , self.dbFilePath)
+            self.expType = exp.experimentType
+            self.dataset = exp.getDataset()
+
+        self.createExpRunFile()
+
+
+    def run(self):
+        exp = loadExperiment( self.expName , self.dbFilePath)
+        exp.setExternalStatus("QUEUED")
+        del exp
+        os.system("cd %s ; nohup /usr/bin/python %s & " % (self.dataPath, self.expFilePath))
+
+
+    def createExpDB():
+        strexec = 'from cistematic.experiments.' + modName + ' import ' + self.expType + '\n'
+        exec strexec
+        strexec = 'exp = ' + self.expType + '("' + self.expName + '", "' + self.dbFilePath + '")\n'
+        exec strexec
+
+
+    def createExpRunFile(self):
+        outFile = open(self.expFilePath, "w")
+        outFile.write(self.expHead())
+        outFile.write(self.expInit())
+        outFile.write(self.expPrograms())
+        outFile.write(self.expRun())
+        outFile.write(self.expAnalysis())
+        outFile.write(self.expStatus())
+        outFile.close()
+
+
+    def expHead(self):
+        strHead = ["# Experiment: %s autogenerated by Cistematic GenExp 0.9.9b\n" % self.expName]
+        strHead.append("from sys import path\n")
+        strHead.append("cisPath = '%s'\n" % cisRoot)
+        strHead.append("if cisPath not in path:\n\tpath.append(cisPath)\n\n")
+        for oneProg in supportedPrograms:
+            strHead.append("from cistematic.programs.%s import %s\n" % (oneProg[0], oneProg[1]))
+
+        strHead.append("\n")
+
+        return string.join(strHead, "")
+
+
+    def expInit(self):
+        useMussa = False
+        useSeqcomp = False
+        startingGenome = ""
+        targetGenomes = []
+        consGene = []
+        geneIDList = []
+
+        if self.expType  in ["orthology", "phyloFoot", "phyloTest"]:
+            exp = loadExperiment( self.expName , self.dbFilePath)
+            consSettings = exp.getSetting("expConsConfig")
+            (window, threshold, numseq, homologs) = consSettings[0].split(":")
+            self.dataset.reverse()
+            if homologs != "1" and exp.settingsHasKey("gTargets"):
+                gTargets = exp.getSetting("gTargets")
+                if len(gTargets) > 0:
+                    targetGenomes = gTargets[0].split(":")
+
+            del exp
+            for geneID in self.dataset:
+                (genome, gid) = geneID
+                if (genome not in targetGenomes) and homologs == "1":
+                    targetGenomes.append(genome)
+
+                if startingGenome == "":
+                    startingGenome = genome
+
+                gid = gid[0]
+                # we are assuming that all of these genes will be from the same 
+                # genome - this is only true (or necessary) if these are not 
+                # explicitely maked as "homologs" by the user
+                if gid not in consGene:
+                    consGene.append(gid)
+
+                geneIDList.append((genome, gid))
+
+            preInitialize = ""
+            if homologs == "1":
+                preInitializeList = ["exp.createConservation()\n"]
+                preInitializeList.append("exp.loadConservation()\n")
+                preInitializeList.append("paralogs = %s\n" % str(geneIDList))
+                preInitializeList.append('exp.insertHomologs(paralogs, "genExp")\n\n')
+                preInitialize = string.join(preInitializeList, "")
+                del consGene[1:]
+
+            if int(numseq) > 0:
+                useSeqcomp = True
+            else:
+                useMussa = True
+        else:
+            initializeArguments = str(self.dataset)
+
+        strInit = ["# Initialize Experiment\n"]
+        strInit.append("from cistematic.experiments import loadExperiment\n\n")
+        strInit.append('exp = loadExperiment("%s", "%s")\n' % (self.expName, self.dbFilePath))
+        strInit.append('exp.setExternalStatus("INITIALIZING")\n')
+        strInit.append(preInitialize)
+        if useMussa or useSeqcomp:
+            if len(targetGenomes) > 0:
+                strInit.append('exp.initialize("%s", %s, %s)\n\n' % (startingGenome, str(consGene), str(targetGenomes)))
+            else:
+                strInit.append('exp.initialize("%s", %s)\n\n' % (startingGenome, str(consGene)))
+            strInit.append('exp.setExternalStatus("CONSERVATION")\n')
+            strInit.append("exp.computeAlignments()\n")
+        else:
+            strInit.append("exp.initialize(%s)\n\n" % initializeArguments)
+
+        if useMussa:
+            strInit.append("exp.mapMussaConservation(window=%s, threshold=%s)\n\n" % (window, threshold))
+
+        if useSeqcomp:
+            strInit.append("exp.mapSeqcompConservation(window=%s, threshold=%s, minSequences=%s)\n\n" % (window, threshold, numseq))
+
+        return string.join(strInit, "")
+
+
+    def expPrograms(self):
+        index = 1
+        genomes = []
+        for (genome, geneID) in self.dataset:
+            if genome not in genomes:
+                genomes.append(genome)
+
+        strProg = ["# Load individual programs and settings\n"]
+        for entry in self.expProgs:
+            progArray = entry[:-1].split(";")
+            (amod, aProg) = progArray[0].split(":")
+            strProg.append("prog%s = %s()\n" % (str(index), aProg))
+            if len(genomes) == 1 and self.expType == "Simple":
+                strProg.apend('prog%s.setGenome("%s")\n' % (str(index), genomes[0]))
+            if len(progArray) > 1:
+                strProg.append("prog%s.setGenExpOptions(%s)\n" % (str(index), str(progArray[1:])))
+            strProg.append("exp.appendProgram(prog%s)\n" % (str(index)))
+            index += 1
+
+        strProg.append("\n")
+
+        return string.join(strProg, "")
+
+
+    def expRun(self):
+        strRun = ["# Run experiment\n",
+                  'exp.setExternalStatus("MOTIF FINDING")\n',
+                  "exp.run()\n",
+                  "\n"
+        ]
+
+        return string.join(strRun, "")
+
+
+    def expAnalysis(self):
+        strAnalysis = ["# Analysis Section\n",
+                       'exp.setExternalStatus("ANALYZING")\n',
+                       'exp.loadAnalysis("consensus")\n',
+                       "exp.annotateConsensus()\n",
+                       "exp.mapConsensus()\n",
+                       "exp.buildMotifSize()\n",
+                       "\n",
+                       'exp.loadAnalysis("1 mismatch")\n',
+                       "exp.annotateConsensus(numMismatches=1)\n",
+                       "exp.mapConsensus(numMismatches=1)\n",
+                       "exp.buildMotifSize()\n",
+                       "\n",
+                       'exp.loadAnalysis("PWM - 90% threshold")\n',
+                       "exp.annotateConsensus()\n",
+                       "exp.mapMotifs(90.0)\n",
+                       "exp.buildMotifSize()\n\n",
+                       "\n"
+        ]
+
+        return string.join(strAnalysis, "")
+
+
+    def expStatus(self):
+        return "exp.resetExternalStatus()\n\n"
\ No newline at end of file