erange 4.0a dev release with integrated cistematic
[erange.git] / cistematic / experiments / genexp.py
1 ###########################################################################
2 #                                                                         #
3 # C O P Y R I G H T   N O T I C E                                         #
4 #  Copyright (c) 2003-10 by:                                              #
5 #    * California Institute of Technology                                 #
6 #                                                                         #
7 #    All Rights Reserved.                                                 #
8 #                                                                         #
9 # Permission is hereby granted, free of charge, to any person             #
10 # obtaining a copy of this software and associated documentation files    #
11 # (the "Software"), to deal in the Software without restriction,          #
12 # including without limitation the rights to use, copy, modify, merge,    #
13 # publish, distribute, sublicense, and/or sell copies of the Software,    #
14 # and to permit persons to whom the Software is furnished to do so,       #
15 # subject to the following conditions:                                    #
16 #                                                                         #
17 # The above copyright notice and this permission notice shall be          #
18 # included in all copies or substantial portions of the Software.         #
19 #                                                                         #
20 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,         #
21 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF      #
22 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND                   #
23 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS     #
24 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN      #
25 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN       #
26 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE        #
27 # SOFTWARE.                                                               #
28 ###########################################################################
29 #
30 import os
31 import string
32 from cistematic.programs import supportedPrograms
33 from cistematic.experiments import experimentTypes, loadExperiment
34 from os import environ
35
36 if environ.get("CISTEMATIC_ROOT"):
37     cisRoot = environ.get("CISTEMATIC_ROOT") 
38 else:
39     cisRoot = "/proj/genome"
40
41
42 class GenExp:
43     dataPath = ""
44     dbFilePath = ""
45     expFilePath = ""
46     expName = ""
47     expType = ""
48     dataset = []
49     expProgs = []
50     expSettings = {}
51     analysisName = ""
52
53
54     def __init__(self, path, dbPath, newexp, type="", dset="", progs="", analysis="", createExp=False):
55         self.dataPath = path
56         self.dbFilePath = dbPath
57         self.expName = newexp
58         self.expFilePath = string.replace(dbPath, ".db", "-db.py")
59         self.expType = type
60         self.dataset = dset
61         self.expProgs = progs
62         self.analysisName = newexp + analysis
63
64         if createExp:
65             self.createExpDB()
66
67         if len(self.expProgs) == 0:
68             exp = loadExperiment( self.expName , self.dbFilePath)
69             self.expType = exp.experimentType
70             self.expProgs = exp.getSetting("expProgs")
71
72         if len(dset) == 0:
73             exp = loadExperiment( self.expName , self.dbFilePath)
74             self.expType = exp.experimentType
75             self.dataset = exp.getDataset()
76
77         self.createExpRunFile()
78
79
80     def run(self):
81         exp = loadExperiment( self.expName , self.dbFilePath)
82         exp.setExternalStatus("QUEUED")
83         del exp
84         os.system("cd %s ; nohup /usr/bin/python %s & " % (self.dataPath, self.expFilePath))
85
86
87     def createExpDB():
88         strexec = 'from cistematic.experiments.' + modName + ' import ' + self.expType + '\n'
89         exec strexec
90         strexec = 'exp = ' + self.expType + '("' + self.expName + '", "' + self.dbFilePath + '")\n'
91         exec strexec
92
93
94     def createExpRunFile(self):
95         outFile = open(self.expFilePath, "w")
96         outFile.write(self.expHead())
97         outFile.write(self.expInit())
98         outFile.write(self.expPrograms())
99         outFile.write(self.expRun())
100         outFile.write(self.expAnalysis())
101         outFile.write(self.expStatus())
102         outFile.close()
103
104
105     def expHead(self):
106         strHead = ["# Experiment: %s autogenerated by Cistematic GenExp 0.9.9b\n" % self.expName]
107         strHead.append("from sys import path\n")
108         strHead.append("cisPath = '%s'\n" % cisRoot)
109         strHead.append("if cisPath not in path:\n\tpath.append(cisPath)\n\n")
110         for oneProg in supportedPrograms:
111             strHead.append("from cistematic.programs.%s import %s\n" % (oneProg[0], oneProg[1]))
112
113         strHead.append("\n")
114
115         return string.join(strHead, "")
116
117
118     def expInit(self):
119         useMussa = False
120         useSeqcomp = False
121         startingGenome = ""
122         targetGenomes = []
123         consGene = []
124         geneIDList = []
125
126         if self.expType  in ["orthology", "phyloFoot", "phyloTest"]:
127             exp = loadExperiment( self.expName , self.dbFilePath)
128             consSettings = exp.getSetting("expConsConfig")
129             (window, threshold, numseq, homologs) = consSettings[0].split(":")
130             self.dataset.reverse()
131             if homologs != "1" and exp.settingsHasKey("gTargets"):
132                 gTargets = exp.getSetting("gTargets")
133                 if len(gTargets) > 0:
134                     targetGenomes = gTargets[0].split(":")
135
136             del exp
137             for geneID in self.dataset:
138                 (genome, gid) = geneID
139                 if (genome not in targetGenomes) and homologs == "1":
140                     targetGenomes.append(genome)
141
142                 if startingGenome == "":
143                     startingGenome = genome
144
145                 gid = gid[0]
146                 # we are assuming that all of these genes will be from the same 
147                 # genome - this is only true (or necessary) if these are not 
148                 # explicitely maked as "homologs" by the user
149                 if gid not in consGene:
150                     consGene.append(gid)
151
152                 geneIDList.append((genome, gid))
153
154             preInitialize = ""
155             if homologs == "1":
156                 preInitializeList = ["exp.createConservation()\n"]
157                 preInitializeList.append("exp.loadConservation()\n")
158                 preInitializeList.append("paralogs = %s\n" % str(geneIDList))
159                 preInitializeList.append('exp.insertHomologs(paralogs, "genExp")\n\n')
160                 preInitialize = string.join(preInitializeList, "")
161                 del consGene[1:]
162
163             if int(numseq) > 0:
164                 useSeqcomp = True
165             else:
166                 useMussa = True
167         else:
168             initializeArguments = str(self.dataset)
169
170         strInit = ["# Initialize Experiment\n"]
171         strInit.append("from cistematic.experiments import loadExperiment\n\n")
172         strInit.append('exp = loadExperiment("%s", "%s")\n' % (self.expName, self.dbFilePath))
173         strInit.append('exp.setExternalStatus("INITIALIZING")\n')
174         strInit.append(preInitialize)
175         if useMussa or useSeqcomp:
176             if len(targetGenomes) > 0:
177                 strInit.append('exp.initialize("%s", %s, %s)\n\n' % (startingGenome, str(consGene), str(targetGenomes)))
178             else:
179                 strInit.append('exp.initialize("%s", %s)\n\n' % (startingGenome, str(consGene)))
180             strInit.append('exp.setExternalStatus("CONSERVATION")\n')
181             strInit.append("exp.computeAlignments()\n")
182         else:
183             strInit.append("exp.initialize(%s)\n\n" % initializeArguments)
184
185         if useMussa:
186             strInit.append("exp.mapMussaConservation(window=%s, threshold=%s)\n\n" % (window, threshold))
187
188         if useSeqcomp:
189             strInit.append("exp.mapSeqcompConservation(window=%s, threshold=%s, minSequences=%s)\n\n" % (window, threshold, numseq))
190
191         return string.join(strInit, "")
192
193
194     def expPrograms(self):
195         index = 1
196         genomes = []
197         for (genome, geneID) in self.dataset:
198             if genome not in genomes:
199                 genomes.append(genome)
200
201         strProg = ["# Load individual programs and settings\n"]
202         for entry in self.expProgs:
203             progArray = entry[:-1].split(";")
204             (amod, aProg) = progArray[0].split(":")
205             strProg.append("prog%s = %s()\n" % (str(index), aProg))
206             if len(genomes) == 1 and self.expType == "Simple":
207                 strProg.apend('prog%s.setGenome("%s")\n' % (str(index), genomes[0]))
208             if len(progArray) > 1:
209                 strProg.append("prog%s.setGenExpOptions(%s)\n" % (str(index), str(progArray[1:])))
210             strProg.append("exp.appendProgram(prog%s)\n" % (str(index)))
211             index += 1
212
213         strProg.append("\n")
214
215         return string.join(strProg, "")
216
217
218     def expRun(self):
219         strRun = ["# Run experiment\n",
220                   'exp.setExternalStatus("MOTIF FINDING")\n',
221                   "exp.run()\n",
222                   "\n"
223         ]
224
225         return string.join(strRun, "")
226
227
228     def expAnalysis(self):
229         strAnalysis = ["# Analysis Section\n",
230                        'exp.setExternalStatus("ANALYZING")\n',
231                        'exp.loadAnalysis("consensus")\n',
232                        "exp.annotateConsensus()\n",
233                        "exp.mapConsensus()\n",
234                        "exp.buildMotifSize()\n",
235                        "\n",
236                        'exp.loadAnalysis("1 mismatch")\n',
237                        "exp.annotateConsensus(numMismatches=1)\n",
238                        "exp.mapConsensus(numMismatches=1)\n",
239                        "exp.buildMotifSize()\n",
240                        "\n",
241                        'exp.loadAnalysis("PWM - 90% threshold")\n',
242                        "exp.annotateConsensus()\n",
243                        "exp.mapMotifs(90.0)\n",
244                        "exp.buildMotifSize()\n\n",
245                        "\n"
246         ]
247
248         return string.join(strAnalysis, "")
249
250
251     def expStatus(self):
252         return "exp.resetExternalStatus()\n\n"