1 ###########################################################################
3 # C O P Y R I G H T N O T I C E #
4 # Copyright (c) 2003-10 by: #
5 # * California Institute of Technology #
7 # All Rights Reserved. #
9 # Permission is hereby granted, free of charge, to any person #
10 # obtaining a copy of this software and associated documentation files #
11 # (the "Software"), to deal in the Software without restriction, #
12 # including without limitation the rights to use, copy, modify, merge, #
13 # publish, distribute, sublicense, and/or sell copies of the Software, #
14 # and to permit persons to whom the Software is furnished to do so, #
15 # subject to the following conditions: #
17 # The above copyright notice and this permission notice shall be #
18 # included in all copies or substantial portions of the Software. #
20 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, #
21 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF #
22 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND #
23 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS #
24 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN #
25 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN #
26 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE #
28 ###########################################################################
30 # python parent class for experiments
32 from pysqlite2 import dbapi2 as sqlite
34 from sqlite3 import dbapi2 as sqlite
36 import os, time, tempfile, string
38 from cistematic.core import retrieveSeq, retrieveSeqFeatures, fasta
39 import cistematic.core
40 from cistematic.core.motif import matrixRow, Motif
41 from cistematic.genomes import Genome
42 from cistematic import version
43 from os import environ
45 if environ.get("CISTEMATIC_TEMP"):
46 cisTemp = environ.get("CISTEMATIC_TEMP")
50 tempfile.tempdir = cisTemp
54 """ genepool = {(genomeA, geneA1): seq1, (genomeA, geneA2): seq2, (genomeB, geneB1): seq3,...}
55 geneFeatures = ((genomeA, geneA1):[feature0, feature1, .....], (genomeB, geneB1):[feature0, feature1,...],....}
56 programs = [(program_object1, setting1), (program_object2, setting2), ....]
64 experimentType = "generic"
70 def __init__(self, expID, expDBFile, geneDBFile=""):
71 self.experimentID = expID
72 self.expFile = expDBFile
73 self.geneDB = geneDBFile
74 self.maskLowerCase = False
75 self.boundToNextGene = False
76 print "cistematic version %s" % version
87 def setExperimentID(self, expID):
88 self.mlog("changing experiment ID from %s to %s" % (self.experimentID, expID))
89 self.experimentID = expID
92 def setGeneDB(self, geneDBFile=""):
93 self.mlog("using %s as the gene database" % geneDBFile)
94 self.geneDB = geneDBFile
97 def cacheGeneDB(self, genome):
99 cistematic.core.cacheGeneDB(genome)
100 self.mlog("cached genome %s" % genome)
102 self.mlog("could not cache genome %s" % genome)
105 def uncacheGeneDB(self):
107 cistematic.core.uncacheGeneDB()
109 self.mlog("could not uncache genomes")
112 def setMaskLowerCase(self, maskValue):
113 if maskValue == True or maskValue == 1 or maskValue == "1":
114 self.maskLowerCase = True
115 self.setSettings("maskLowerCase", ["1"])
117 self.maskLowerCase = False
118 self.setSettings("maskLowerCase", ["0"])
121 def setBoundToNextGene(self, boundValue):
122 if boundValue == True or boundValue == 1 or boundValue == "1":
123 self.boundToNextGene = True
124 self.setSettings("boundToNextGene", ["1"])
126 self.boundToNextGene = False
127 self.setSettings("boundToNextGene", ["0"])
130 def setSeqParameters(self, up=0, cds=1, down=0):
133 self.downstream = down
139 self.setSettings("seq_parameters", ["%d\t%d\t%d" % (up, cds, down)])
140 self.mlog("setting sequence retrieval parameters to %d bp upstream, %d bp downstream, and %scds" % (up, down, cdsStatus))
143 def getSeqParameters(self):
144 return (self.upstream, self.cds, self.downstream)
147 def dsetLength(self):
148 stmt="SELECT count(*) from dataset where expID = '%s' " % self.experimentID
149 res = self.sqlexp(stmt)
151 answer = int(res[0][0])
158 def resultsLength(self):
159 stmt="SELECT count(*) from results where expID = '%s' " % self.experimentID
160 res = self.sqlexp(stmt)
162 answer = int(res[0][0])
169 def checkMotID(self, motID):
171 stmt = "SELECT ID from motifs where expID = '%s' and mTagID = '%s' " % (self.experimentID, motID)
172 res = self.sqlexp(stmt)
179 def findMotif(self,mTagID):
180 if self.checkMotID(mTagID):
181 return self.makeMotif(mTagID)
183 self.mlog("could not find %s" % mTagID)
188 def makeMotif(self, motID):
191 stmt = "SELECT motifSeq, threshold, info from motifs where expID = '%s' and mTagID = '%s' " % (self.experimentID, motID)
192 res = self.sqlexp(stmt)
194 (seq, threshold, info) = entry
195 stmt = "SELECT aFreq, cFreq, gFreq, tFreq from motifPWMs where expID = '%s' and mTagID = '%s' order by position" % (self.experimentID, motID)
196 res = self.sqlexp(stmt)
198 col = [0.0, 0.0, 0.0, 0.0]
199 (aFreq, cFreq, gFreq, tFreq) = entry
200 col[matrixRow["A"]] = aFreq
201 col[matrixRow["C"]] = cFreq
202 col[matrixRow["G"]] = gFreq
203 col[matrixRow["T"]] = tFreq
206 stmt = "SELECT sequence from motifSequences where expID = '%s' and mTagID = '%s' and type = 'instance' " % (self.experimentID, motID)
207 res = self.sqlexp(stmt)
212 return Motif(motID, seq, mPWM, mseqs, threshold, info)
215 def saveMotif(self, mot):
217 motifSeq = mot.motifSeq
219 motifThreshold = mot.threshold
220 if self.checkMotID(motID):
221 stmt = "DELETE from motifs where expID = '%s' and mTagID = '%s' " % (self.experimentID, motID)
222 self.sqlexp(stmt, commit=True)
223 stmt = "DELETE from motifPWMs where expID = '%s' and mTagID = '%s' " % (self.experimentID, motID)
224 self.sqlexp(stmt, commit=True)
225 stmt = "DELETE from motifSequences where expID = '%s' and mTagID = '%s' " % (self.experimentID, motID)
226 self.sqlexp(stmt, commit=True)
229 values = "(NULL, '%s', '%s', '%s', %f, '%s')" % (self.experimentID, motID, motifSeq, motifThreshold, motifInfo)
230 stmt = "INSERT into motifs(ID, expID, mTagID, motifSeq, threshold, info) values %s " % values
231 self.sqlexp(stmt, commit=True)
233 stmt = "INSERT into motifPWMs(ID, expID, mTagID, position, aFreq, cFreq, gFreq, tFreq) values (NULL, ?, ?, ?, ?, ?, ?, ?)"
234 for col in mot.motifPWM:
235 aFreq = round(col[matrixRow["A"]],4)
236 cFreq = round(col[matrixRow["C"]],4)
237 gFreq = round(col[matrixRow["G"]],4)
238 tFreq = abs(1.0 - aFreq - cFreq - gFreq)
239 stmtList.append((self.experimentID, motID, pos, aFreq, cFreq, gFreq, tFreq))
242 self.batchsqlexp(stmt, stmtList)
243 if len(mot.sequences) > 0:
244 stmt = "INSERT into motifSequences(ID, expID, mTagID, sequence, type, location) values (NULL, ?, ?, ?, ?, ?) "
246 for seq in mot.sequences:
247 stmtList.append((self.experimentID, motID, seq, "instance", "-"))
249 self.batchsqlexp(stmt, stmtList)
252 def exportMotifs(self, directory=".", prefix="", suffix="mot"):
253 stmt = "SELECT distinct mTagID from results where expID = '%s' " % (self.experimentID)
254 res = self.sqlexp(stmt)
256 prefix = self.experimentID
260 mot = self.makeMotif(motID)
261 fileName = "%s/%s-%s.%s" % (directory, prefix, motID, suffix)
262 self.mlog("exporting %s as %s" % (motID, fileName))
263 mot.saveMotif(fileName)
266 def exportLogos(self, directory=".", prefix=""):
267 stmt = "SELECT distinct mTagID from results where expID = '%s' " % (self.experimentID)
268 res = self.sqlexp(stmt)
270 prefix = self.experimentID
273 mot = self.makeMotif(motID)
274 fileName = "%s/%s-%s" % (directory, prefix, motID)
275 self.mlog("saving logo for %s as %s.png" % (motID, fileName))
276 mot.saveLogo(fileName)
279 def appendResults(self, mot, resultsGroup="-"):
282 stmt = "INSERT into results(ID, expID, resultsGroup, mTagID) values (NULL, '%s', '%s', '%s') " % (self.experimentID, resultsGroup, motID)
283 self.sqlexp(stmt, "commit")
286 def getGeneDB(self, geneDBFile=""):
292 stmt = "SELECT timestamp, message from expLog where expID = '%s' order by timestamp" % self.experimentID
293 res = self.sqlexp(stmt)
295 (timestamp, message) = entry
296 answer.append((eval(timestamp), message))
301 def getResults(self):
303 stmt = "SELECT distinct mTagID from results where expID = '%s' " % (self.experimentID)
304 res = self.sqlexp(stmt)
307 answer.append(self.makeMotif(motID))
312 def getSettings(self):
314 stmt = "SELECT settingName, data from settings where expID = '%s' " % (self.experimentID)
315 res = self.sqlexp(stmt)
317 (settingName, data) = entry
318 if settingName not in answer.keys():
319 answer[settingName] = []
321 answer[settingName].append(data)
326 def getSetting(self, settingName):
328 stmt = "SELECT data from settings where expID = '%s' and settingName = '%s' " % (self.experimentID, settingName)
329 res = self.sqlexp(stmt)
331 answer.append(entry[0])
336 def settingsHasKey(self, thekey):
338 stmt = "SELECT distinct ID from settings where expID = '%s' and settingName = '%s' " % (self.experimentID, thekey)
339 res = self.sqlexp(stmt)
346 def setSettings(self, settingName, settingList):
347 """ insert or replace (i.e. delete previous entry) a setting with one or more setting data.
349 # delete existing, insert new
351 stmt = "DELETE from settings where expID = '%s' and settingName = '%s' " % (self.experimentID, settingName)
352 res = self.sqlexp(stmt, "commit")
357 stmt = "INSERT into settings (ID, expID, settingName, data) values (NULL, ?, ?, ?)"
358 for entry in settingList:
359 stmtList.append((self.experimentID, settingName, entry))
361 res = self.batchsqlexp(stmt, stmtList)
366 def setSettingsID(self, settingName, data):
367 """ return the settingsID for the inserted settingName:data pair in the settings table.
369 stmt = "INSERT into settings (ID, expID, settingName, data) values (NULL, '%s', '%s', \"%s\")" % (self.experimentID, settingName, data)
370 res = self.sqlexp(stmt, "commit")
375 def getSettingsID(self, settingID):
376 """ get a setting by settingsID in the settings table.
379 stmt = "SELECT settingName, data from settings where expID = '%s' and ID = %d" % (self.experimentID, int(settingID))
380 res = self.sqlexp(stmt)
382 (name, data) = res[0]
383 answer = (name, data)
390 def setRun(self, progName, datasetID, settingsID):
391 values = "(NULL, '%s', '%s', '%s', %d, '%s', '%s')" % (self.experimentID, progName, datasetID, settingsID, time.localtime(), "-")
392 stmt = "INSERT into runs (ID, expID, progName, datasetGroup, settingsID, timestamp, resultsGroup) values %s" % values
393 runID = self.sqlexp(stmt, "commit")
394 self.mlog("run %s: program %s with settings %d and dataset %s" % (runID, progName, settingsID, datasetID))
399 def getRun(self, rID):
401 stmt = "SELECT progName, datasetGroup, settingsID, timestamp, resultsGroup from runs where expID = '%s' and ID = %d" % (self.experimentID, rID)
402 res = self.sqlexp(stmt)
403 (progName, datasetID, settingsID, timestamp, resultsID) = res[0]
406 if datasetID != "chromolist":
407 datasetID = int(datasetID)
409 return (progName, datasetID, int(settingsID), timestamp, resultsID)
412 def getRunsByProg(self, prog):
413 runs = self.getRuns()
416 if (runs[entry][0] == prog):
417 matchingRuns.append(runs[entry])
425 stmt = "SELECT ID, progName, datasetGroup, settingsID, timestamp, resultsGroup from runs where expID = '%s' " % (self.experimentID)
426 res = self.sqlexp(stmt)
428 (ID, progName, datasetID, settingsID, timestamp, resultsID) = entry
429 runs[int(ID)] = (progName, datasetID, settingsID, eval(timestamp), resultsID)
434 def appendDataset(self, dset):
436 stmt = "INSERT into dataset(ID, genome, locus, sequence, expID) values (NULL, ?, ?, ?, ?) "
437 for (genome, entries) in dset:
438 for entry in entries:
440 (locus, sequence) = entry
445 stmtList.append((genome, locus, sequence, self.experimentID))
447 res = self.batchsqlexp(stmt, stmtList)
450 def getDataset(self):
452 stmt = "SELECT genome, locus, sequence from dataset where expID = '%s' " % (self.experimentID)
453 res = self.sqlexp(stmt)
455 (genome, locus, sequence) = entry
457 dset.append((genome, [locus]))
459 dset.append((genome, [locus, sequence]))
464 def getDatasetNames(self):
466 stmt = "select settingName from settings where settingName like 'dataset" + "%' and expID = '" + self.experimentID + "' "
467 res = self.sqlexp(stmt)
474 def getDatasetIDs(self):
476 stmt = "select ID from settings where settingName like 'dataset" + "%' and expID = '" + self.experimentID + "' "
477 res = self.sqlexp(stmt)
484 def getFeatures(self, geneID):
488 stmt = "select featureType, start, stop, orientation from sequenceFeatures where seqGenome = '%s' and seqID = '%s' and expID = '%s' " % (geneID[0], geneID[1], self.experimentID)
489 res = self.sqlexp(stmt)
491 (ftype, start, stop, orientation) = entry
492 results.append((ftype, start, stop, orientation))
501 def absoluteLocation(self, match, featureLength, seqparams="", gidCoordinates="", customDB=""):
502 """ Returns the absolute location of the start of a match = ((genome, gID), (pos, sense)), given a relative
503 location with respect to the gene and a feature length.
504 Can be passed cached sequence parameters (up, cds, down) and geneEntry tuple to avoid hitting database.
506 result = ["", 0, "F"]
507 (geneID, loc) = match
511 seqparams = self.getSeqParameters()
513 (up, cds, down) = seqparams
514 if gidCoordinates == "":
515 gidCoordinates = cistematic.core.geneEntry(geneID)
517 (gidChrom, gidStart, gidStop, gidLength, gidSense) = gidCoordinates
519 if self.boundToNextGene:
520 up = cistematic.core.upstreamToNextGene(geneID, up, db=customDB)
521 down = cistematic.core.downstreamToNextGene(geneID, down, db=customDB)
525 if pos < up or cds > 0:
526 result[1] = gidStart - up + pos
528 result[1] = gidStop - up + pos
530 if gidStart > gidStop:
538 if pos < up or cds > 0:
539 result[1] = gidStop + up - pos - featureLength
541 result[1] = gidStart + up - pos - featureLength
546 def setWorkdir(self, wdir=""):
548 self.mlog("changed workdir to %s" % (wdir))
551 def resetDataset(self):
552 stmt = "DELETE from dataset where expID = '%s'" % self.experimentID
553 res = self.sqlexp(stmt, "commit")
554 self.mlog("log reset")
558 stmt = "DELETE from expLog where expID = '%s'" % self.experimentID
559 res = self.sqlexp(stmt, "commit")
560 self.mlog("log reset")
563 def resetResults(self):
564 stmt = "DELETE from results where expID = '%s'" % self.experimentID
565 res = self.sqlexp(stmt, "commit")
566 self.mlog("results reset")
570 stmt = "DELETE from expLog where expID = '%s'" % self.experimentID
571 res = self.sqlexp(stmt, "commit")
572 self.mlog("runs reset")
575 def resetPrograms(self):
577 self.mlog("programs reset")
580 def resetSettings(self):
581 stmt = "DELETE from expLog where expID = '%s'" % self.experimentID
582 res = self.sqlexp(stmt, "commit")
583 self.log("settings reset")
586 def appendProgram(self, program):
587 self.programs.append((program, self.setSettingsID(program.name(), program.getSettings())))
588 self.mlog("adding program %s" % program.name())
591 def removeProgram(self, progName):
592 for index in range(len(self.programs)):
593 if self.programs[index][0].name() == progName:
594 del self.programs[index]
595 self.mlog("removed program %s" % progName)
598 def createWorkdir(self):
599 if self.workdir == "":
600 self.workdir = tempfile.mktemp()
603 os.mkdir(self.workdir)
604 self.mlog("created workdir: %s" % (self.workdir))
609 def removeWorkdir(self):
611 filenames = os.listdir(self.workdir)
612 for entry in filenames:
613 os.remove("%s/%s" % (self.workdir, entry))
614 os.rmdir(self.workdir)
616 self.mlog("could not delete workdir: %s" % (self.workdir))
621 stmt = "INSERT into expLog(ID, expID, timestamp, message) values (NULL, '%s', '%s', '%s') " % (self.experimentID, time.localtime(),string.replace(msg, "'", '"'))
622 self.sqlexp(stmt, "commit")
625 def logToString(self):
627 for line in self.getLog():
628 response += time.asctime(line[0]) + ": " + line[1] + "\n"
634 for line in self.getLog():
635 print "%s: %s" % (time.asctime(line[0]), line[1])
639 theLog = self.getLog()
640 for line in theLog[-10:]:
641 print "%s: %s" % (time.asctime(line[0]), line[1])
644 def loadPrograms(self):
646 for progs in self.getSetting("loaded_programs"):
647 (progs0, progs1, progs2) = progs.split("\t")
648 execString = "from %s import %s" % (progs0, progs1)
650 execString = 'self.programs.append((apply(%s), %s))' % (progs1, progs2)
654 def savePrograms(self):
656 for (program, settingID) in self.programs:
657 progs.append("%s\t%s\t%s" % (program.__class__.__module__, program.__class__.__name__, settingID))
660 self.setSettings("loaded_programs", progs)
662 def saveGeneDB(self):
663 if self.geneDB != "":
664 self.setSettings("geneDB", [self.geneDB])
667 def loadFasta(self, fastaFile, genomeName):
668 """ load fasta file into the dataset.
670 genIDList = self.loadFastaFromFile(fastaFile, genomeName)
672 return [(genomeName, genIDList)]
675 def loadFastaFromFile(self, fastaFile, genomeName):
679 geneDBPath = "%s.genedb" % genomeName
680 self.mlog("Loading fasta file %s into database %s" % (genomeName, geneDBPath))
681 aGenome = Genome(genomeName, dbFile=geneDBPath)
682 aGenome.createGeneDB()
683 inFile = open(fastaFile, "r")
684 header = inFile.readline()
688 chromID = header.strip()[1:]
689 currentLine = inFile.readline()
690 while currentLine != "" and currentLine[0] != ">":
691 lineSeq = currentLine.strip()
692 seqLen += len(lineSeq)
693 seqArray.append(lineSeq)
694 currentLine = inFile.readline()
696 seq = string.join(seqArray, "")
697 print "Added sequence %s to database" % chromID
698 aGenome.addSequence((genomeName, chromID), seq, "chromosome", str(seqLen))
699 aGenome.addChromosomeEntry(chromID, chromID, "db")
700 aGenome.addGeneEntry((genomeName, chromID), chromID, 0, seqLen - 1, "F", "IMPORT", "1")
702 seqName.append(chromID)
705 aGenome.createIndices()
706 self.setGeneDB(geneDBPath)
711 def loadGenepool(self):
713 for geneTuple in self.getDataset():
714 genome = geneTuple[0]
715 geneList = geneTuple[1]
716 for gene in geneList:
720 geneID = (genome, tag)
722 # need to deal with masking lowercase
723 self.genepool[geneID] = seq.upper()
725 geneID = (genome, gene)
726 # note that we will only keep one copy of a geneid in the genepool, even if we
727 # retrieve it multiple times.
728 self.genepool[geneID] = retrieveSeq(geneID, self.upstream, self.cds, self.downstream, self.geneDB, self.maskLowerCase, self.boundToNextGene)
730 self.mlog("could not load %s" % (str(geneID)))
732 self.genepoolID = self.setSettingsID("genepool", self.genepool.keys())
735 def loadGeneFeatures(self):
736 stmt = "DELETE from sequenceFeatures where expID = '%s'" % self.experimentID
737 res = self.sqlexp(stmt, "commit")
739 stmt = "INSERT into sequenceFeatures (ID, expID, seqGenome, seqID, featureType, start, stop, orientation) values (NULL, ?, ?, ?, ?, ?, ?, ?)"
740 for geneTuple in self.getDataset():
741 genome = geneTuple[0]
742 geneList = geneTuple[1]
743 for gene in geneList:
745 geneID = (genome, gene)
746 results = retrieveSeqFeatures(geneID, self.upstream, self.cds, self.downstream, self.boundToNextGene, self.geneDB)
747 for entry in results:
748 (ftype, start, stop, orientation) = entry
749 stmtList.append((self.experimentID, genome, gene, ftype, start, stop, orientation))
751 self.mlog("could not find features for %s" % (gene))
753 if len(stmtList) > 0:
754 self.batchsqlexp(stmt, stmtList)
761 def toFile(self, geneIDList, filename, geneDict=[]):
762 outFile = open(filename, "w")
763 for geneID in geneIDList:
764 if geneID in geneDict:
765 outFile.write(fasta(geneID, geneDict[geneID]))
766 elif geneID in self.genepool:
767 outFile.write(fasta(geneID, self.genepool[geneID]))
769 self.mlog("could not write %s to file" % str(geneID))
774 def createDataFile(self, datasetID=-1, geneIDList=[], geneDict=[]):
775 oldtempdir = tempfile.tempdir
776 tempfile.tempdir = self.workdir
777 dataFile = tempfile.mktemp()
778 tempfile.tempdir = oldtempdir
780 datasetID = self.genepoolID
782 if len(geneIDList) < 1:
783 settingsList = self.getSettingsID(datasetID)
784 geneIDList = eval(settingsList[1])
787 self.toFile(geneIDList, dataFile, geneDict)
789 self.mlog("could not create dataFile %s" % (dataFile))
794 def initialize(self, dataset=[], workdir=""):
796 self.setWorkdir(workdir)
800 self.appendDataset(dataset)
802 self.loadGeneFeatures()
806 if len(self.programs) == 0:
807 self.mlog("Must instantiate one or more programs first")
808 elif len(self.genepool) == 0:
809 self.mlog("Must have one or more valid sequences in the dataset")
812 def sqlexp(self, stmt, commit=""):
813 db = sqlite.connect(self.expFile, timeout=60)
816 print "sqlexp: %s" % stmt
819 res = sqlc.fetchall()
823 if stmt[0:6] == "INSERT":
832 def batchsqlexp(self, stmt, batch):
833 """ executes a list of sql statements (usually inserts) stored in the list batch with a single commit.
836 db = sqlite.connect(self.expFile, timeout=60)
839 print "batchsql: %s" % stmt
840 print "batchsql: %s" % str(batch)
842 sqlc.executemany(stmt, batch)
850 def setExternalStatus(self, status):
851 self.mlog("Setting status to %s" % status)
852 statfile = open("%s.status" % self.expFile, "w")
853 statfile.write(status)
857 def resetExternalStatus(self):
859 os.remove("%s.status" % self.expFile)
865 if not os.path.exists(self.expFile):
867 db = sqlite.connect(self.expFile, timeout=60)
869 sql.execute("CREATE table experiment(ID INTEGER PRIMARY KEY, expID varchar, expType varchar, expStatus varchar, timestamp varchar)")
870 sql.execute("CREATE table dataset(ID INTEGER PRIMARY KEY, expID varchar, datasetGroup varchar, genome varchar, locus varchar, sequence varchar)")
871 sql.execute("CREATE table results(ID INTEGER PRIMARY KEY, expID varchar, resultsGroup varchar, mTagID varchar)")
872 sql.execute("CREATE table motifs(ID INTEGER PRIMARY KEY, expID varchar, mTagID varchar, motifSeq varchar, threshold varchar, info varchar)")
873 sql.execute("CREATE table motifPWMs(ID INTEGER PRIMARY KEY, expID varchar, mTagID varchar, position int, aFreq float, cFreq float, gFreq float, tFreq float)")
874 sql.execute("CREATE table motifSequences(ID INTEGER PRIMARY KEY, expID varchar, mTagID varchar, sequence varchar, type varchar, location varchar)")
875 sql.execute("CREATE table settings(ID INTEGER PRIMARY KEY, expID varchar, settingName varchar, data varchar)")
876 sql.execute("CREATE table runs(ID INTEGER PRIMARY KEY, expID varchar, progName varchar, datasetGroup varchar, settingsID int, timestamp varchar, resultsGroup varchar)")
877 sql.execute("CREATE table expLog(ID INTEGER PRIMARY KEY, expID varchar, timestamp varchar, message varchar)")
878 sql.execute("CREATE table sequenceFeatures(ID INTEGER PRIMARY KEY, expID varchar, seqGenome varchar, seqID varchar, featureType varchar, start int, stop int, orientation varchar)")
880 sql.execute("CREATE INDEX settingIndex1 on settings(expID, settingName)")
881 sql.execute("CREATE INDEX datasetIndex1 on dataset(expID, datasetGroup)")
882 sql.execute("CREATE INDEX motifsIndex1 on motifs(expID, mTagID)")
883 sql.execute("CREATE INDEX motifPWMsIndex1 on motifPWMs(expID, mTagID)")
884 sql.execute("CREATE INDEX motifSequencesIndex1 on motifSequences(expID, mTagID)")
885 sql.execute("CREATE INDEX featuresIndex1 on sequenceFeatures(expID, seqGenome, seqID)")
887 sql.execute("INSERT INTO settings(ID, expID, settingName, data) values (NULL, '%s', 'experimentType', '%s')" % (self.experimentID, self.experimentType))
892 self.mlog("Created experiment database %s" % self.expFile)
894 self.mlog("Could not create experiment database %s" % self.expFile)
896 self.mlog("Using existing experiment database %s" % self.expFile)
898 if self.settingsHasKey("loaded_programs"):
901 if self.settingsHasKey("seq_parameters"):
902 res = self.getSetting("seq_parameters")
903 (up, cds, down) = res[0].split("\t")
904 self.setSeqParameters(int(up), int(cds), int(down))
906 self.setSeqParameters()
908 if self.settingsHasKey("maskLowerCase"):
909 res = self.getSetting("maskLowerCase")
910 self.setMaskLowerCase(res[0])
912 self.setMaskLowerCase(False)
914 if self.settingsHasKey("boundToNextGene"):
915 res = self.getSetting("boundToNextGene")
916 self.setBoundToNextGene(res[0])
918 self.setBoundToNextGene(False)
920 if self.settingsHasKey("experimentType"):
921 res = self.getSetting("experimentType")
922 self.experimentType = res[0]
924 if self.settingsHasKey("geneDB"):
925 res = self.getSetting("geneDB")
928 if self.dsetLength() > 0: