X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=cistematic%2Fexperiments%2Fgenexp.py;fp=cistematic%2Fexperiments%2Fgenexp.py;h=b6dae71a56e3f16e16a814dc1c43791acc42575c;hp=0000000000000000000000000000000000000000;hb=bc30aca13e5ec397c92e67002fbf7a103130b828;hpb=0d3e3112fd04c2e6b44a25cacef1d591658ad181 diff --git a/cistematic/experiments/genexp.py b/cistematic/experiments/genexp.py new file mode 100644 index 0000000..b6dae71 --- /dev/null +++ b/cistematic/experiments/genexp.py @@ -0,0 +1,252 @@ +########################################################################### +# # +# C O P Y R I G H T N O T I C E # +# Copyright (c) 2003-10 by: # +# * California Institute of Technology # +# # +# All Rights Reserved. # +# # +# Permission is hereby granted, free of charge, to any person # +# obtaining a copy of this software and associated documentation files # +# (the "Software"), to deal in the Software without restriction, # +# including without limitation the rights to use, copy, modify, merge, # +# publish, distribute, sublicense, and/or sell copies of the Software, # +# and to permit persons to whom the Software is furnished to do so, # +# subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be # +# included in all copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +########################################################################### +# +import os +import string +from cistematic.programs import supportedPrograms +from cistematic.experiments import experimentTypes, loadExperiment +from os import environ + +if environ.get("CISTEMATIC_ROOT"): + cisRoot = environ.get("CISTEMATIC_ROOT") +else: + cisRoot = "/proj/genome" + + +class GenExp: + dataPath = "" + dbFilePath = "" + expFilePath = "" + expName = "" + expType = "" + dataset = [] + expProgs = [] + expSettings = {} + analysisName = "" + + + def __init__(self, path, dbPath, newexp, type="", dset="", progs="", analysis="", createExp=False): + self.dataPath = path + self.dbFilePath = dbPath + self.expName = newexp + self.expFilePath = string.replace(dbPath, ".db", "-db.py") + self.expType = type + self.dataset = dset + self.expProgs = progs + self.analysisName = newexp + analysis + + if createExp: + self.createExpDB() + + if len(self.expProgs) == 0: + exp = loadExperiment( self.expName , self.dbFilePath) + self.expType = exp.experimentType + self.expProgs = exp.getSetting("expProgs") + + if len(dset) == 0: + exp = loadExperiment( self.expName , self.dbFilePath) + self.expType = exp.experimentType + self.dataset = exp.getDataset() + + self.createExpRunFile() + + + def run(self): + exp = loadExperiment( self.expName , self.dbFilePath) + exp.setExternalStatus("QUEUED") + del exp + os.system("cd %s ; nohup /usr/bin/python %s & " % (self.dataPath, self.expFilePath)) + + + def createExpDB(): + strexec = 'from cistematic.experiments.' + modName + ' import ' + self.expType + '\n' + exec strexec + strexec = 'exp = ' + self.expType + '("' + self.expName + '", "' + self.dbFilePath + '")\n' + exec strexec + + + def createExpRunFile(self): + outFile = open(self.expFilePath, "w") + outFile.write(self.expHead()) + outFile.write(self.expInit()) + outFile.write(self.expPrograms()) + outFile.write(self.expRun()) + outFile.write(self.expAnalysis()) + outFile.write(self.expStatus()) + outFile.close() + + + def expHead(self): + strHead = ["# Experiment: %s autogenerated by Cistematic GenExp 0.9.9b\n" % self.expName] + strHead.append("from sys import path\n") + strHead.append("cisPath = '%s'\n" % cisRoot) + strHead.append("if cisPath not in path:\n\tpath.append(cisPath)\n\n") + for oneProg in supportedPrograms: + strHead.append("from cistematic.programs.%s import %s\n" % (oneProg[0], oneProg[1])) + + strHead.append("\n") + + return string.join(strHead, "") + + + def expInit(self): + useMussa = False + useSeqcomp = False + startingGenome = "" + targetGenomes = [] + consGene = [] + geneIDList = [] + + if self.expType in ["orthology", "phyloFoot", "phyloTest"]: + exp = loadExperiment( self.expName , self.dbFilePath) + consSettings = exp.getSetting("expConsConfig") + (window, threshold, numseq, homologs) = consSettings[0].split(":") + self.dataset.reverse() + if homologs != "1" and exp.settingsHasKey("gTargets"): + gTargets = exp.getSetting("gTargets") + if len(gTargets) > 0: + targetGenomes = gTargets[0].split(":") + + del exp + for geneID in self.dataset: + (genome, gid) = geneID + if (genome not in targetGenomes) and homologs == "1": + targetGenomes.append(genome) + + if startingGenome == "": + startingGenome = genome + + gid = gid[0] + # we are assuming that all of these genes will be from the same + # genome - this is only true (or necessary) if these are not + # explicitely maked as "homologs" by the user + if gid not in consGene: + consGene.append(gid) + + geneIDList.append((genome, gid)) + + preInitialize = "" + if homologs == "1": + preInitializeList = ["exp.createConservation()\n"] + preInitializeList.append("exp.loadConservation()\n") + preInitializeList.append("paralogs = %s\n" % str(geneIDList)) + preInitializeList.append('exp.insertHomologs(paralogs, "genExp")\n\n') + preInitialize = string.join(preInitializeList, "") + del consGene[1:] + + if int(numseq) > 0: + useSeqcomp = True + else: + useMussa = True + else: + initializeArguments = str(self.dataset) + + strInit = ["# Initialize Experiment\n"] + strInit.append("from cistematic.experiments import loadExperiment\n\n") + strInit.append('exp = loadExperiment("%s", "%s")\n' % (self.expName, self.dbFilePath)) + strInit.append('exp.setExternalStatus("INITIALIZING")\n') + strInit.append(preInitialize) + if useMussa or useSeqcomp: + if len(targetGenomes) > 0: + strInit.append('exp.initialize("%s", %s, %s)\n\n' % (startingGenome, str(consGene), str(targetGenomes))) + else: + strInit.append('exp.initialize("%s", %s)\n\n' % (startingGenome, str(consGene))) + strInit.append('exp.setExternalStatus("CONSERVATION")\n') + strInit.append("exp.computeAlignments()\n") + else: + strInit.append("exp.initialize(%s)\n\n" % initializeArguments) + + if useMussa: + strInit.append("exp.mapMussaConservation(window=%s, threshold=%s)\n\n" % (window, threshold)) + + if useSeqcomp: + strInit.append("exp.mapSeqcompConservation(window=%s, threshold=%s, minSequences=%s)\n\n" % (window, threshold, numseq)) + + return string.join(strInit, "") + + + def expPrograms(self): + index = 1 + genomes = [] + for (genome, geneID) in self.dataset: + if genome not in genomes: + genomes.append(genome) + + strProg = ["# Load individual programs and settings\n"] + for entry in self.expProgs: + progArray = entry[:-1].split(";") + (amod, aProg) = progArray[0].split(":") + strProg.append("prog%s = %s()\n" % (str(index), aProg)) + if len(genomes) == 1 and self.expType == "Simple": + strProg.apend('prog%s.setGenome("%s")\n' % (str(index), genomes[0])) + if len(progArray) > 1: + strProg.append("prog%s.setGenExpOptions(%s)\n" % (str(index), str(progArray[1:]))) + strProg.append("exp.appendProgram(prog%s)\n" % (str(index))) + index += 1 + + strProg.append("\n") + + return string.join(strProg, "") + + + def expRun(self): + strRun = ["# Run experiment\n", + 'exp.setExternalStatus("MOTIF FINDING")\n', + "exp.run()\n", + "\n" + ] + + return string.join(strRun, "") + + + def expAnalysis(self): + strAnalysis = ["# Analysis Section\n", + 'exp.setExternalStatus("ANALYZING")\n', + 'exp.loadAnalysis("consensus")\n', + "exp.annotateConsensus()\n", + "exp.mapConsensus()\n", + "exp.buildMotifSize()\n", + "\n", + 'exp.loadAnalysis("1 mismatch")\n', + "exp.annotateConsensus(numMismatches=1)\n", + "exp.mapConsensus(numMismatches=1)\n", + "exp.buildMotifSize()\n", + "\n", + 'exp.loadAnalysis("PWM - 90% threshold")\n', + "exp.annotateConsensus()\n", + "exp.mapMotifs(90.0)\n", + "exp.buildMotifSize()\n\n", + "\n" + ] + + return string.join(strAnalysis, "") + + + def expStatus(self): + return "exp.resetExternalStatus()\n\n" \ No newline at end of file