1 ###########################################################################
3 # C O P Y R I G H T N O T I C E #
4 # Copyright (c) 2003-10 by: #
5 # * California Institute of Technology #
7 # All Rights Reserved. #
9 # Permission is hereby granted, free of charge, to any person #
10 # obtaining a copy of this software and associated documentation files #
11 # (the "Software"), to deal in the Software without restriction, #
12 # including without limitation the rights to use, copy, modify, merge, #
13 # publish, distribute, sublicense, and/or sell copies of the Software, #
14 # and to permit persons to whom the Software is furnished to do so, #
15 # subject to the following conditions: #
17 # The above copyright notice and this permission notice shall be #
18 # included in all copies or substantial portions of the Software. #
20 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, #
21 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF #
22 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND #
23 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS #
24 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN #
25 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN #
26 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE #
28 ###########################################################################
31 from pysqlite2 import dbapi2 as sqlite
33 from sqlite3 import dbapi2 as sqlite
35 import tempfile, shutil, os
36 from os import environ
38 if environ.get("CISTEMATIC_ROOT"):
39 cisRoot = environ.get("CISTEMATIC_ROOT")
41 cisRoot = "/proj/genome"
43 from cistematic.core.geneinfo import speciesMap
45 dbPath = "%s/db/homologene.db" % cisRoot
46 homologeneGenomes = ["hsapiens", "mmusculus", "rnorvegicus", "celegans",
47 "cbriggsae", "cremanei", "dmelanogaster", "athaliana",
48 "ggallus", "cfamiliaris", "drerio", "scerevisiae"]
50 if environ.get("CISTEMATIC_TEMP"):
51 cisTemp = environ.get("CISTEMATIC_TEMP")
54 tempfile.tempdir = cisTemp
58 """ The homologyDB class allows for the mapping and return of predefined homology relationships.
66 def __init__(self, tGenomes=[], cache=False):
67 """ initialize the homologyDB object with a target genome and cache database, if desired.
69 self.targetGenomes = tGenomes
75 """ cleanup copy in local cache, if present.
77 if self.cachedDB != "":
82 """ copy homologyDB to a local cache.
84 self.cachedDB = "%s.db" % tempfile.mktemp()
85 shutil.copyfile(dbPath, self.cachedDB)
89 """ delete homologyDB from local cache.
92 if self.cachedDB != "":
94 os.remove(self.cachedDB)
96 print "could not delete %s" % self.cachedDB
102 """ return a handle to the database.
105 if self.cachedDB != "":
108 return sqlite.connect(path, timeout=60)
117 def getHomologousGenes(self, geneID):
118 """ return list of geneIDs homologous to given geneID. Limit to target genomes if specified at initialization.
120 db = self.connectDB()
124 cursor.execute("select homoloID from homolog where genome = :gen and gID = :gid", locals())
125 groups = cursor.fetchall()
126 for hIDentry in groups:
127 homoloID = str(hIDentry[0])
128 cursor.execute("select genome, gID from homolog where homoloID = :homoloID ", locals())
129 genes = cursor.fetchall()
133 if len(self.targetGenomes) > 0:
134 if genome not in self.targetGenomes:
137 results.append((str(genome), str(gID)))
139 results.append((str(genome), str(gID)))
147 def buildHomologeneDB(hFile="homologene.data", hdb=dbPath):
148 """ Populate a new homologyDB database with homology relationships from homologene.
151 db = sqlite.connect(hdb)
153 cursor.execute("create table homolog(ID INTEGER PRIMARY KEY, homoloID varchar, genome varchar, gID varchar)")
156 sqlstmt = "INSERT into homolog(ID, homoloID, genome, gID) values (NULL, ?, ?, ?)"
157 field = line.split("\t")
158 if field[1] in speciesMap:
160 if speciesMap[field[1]] == "arabidopsis":
161 gid = field[3].upper()
163 values = ("homologene-%s" % field[0], speciesMap[field[1]], gid.strip())
167 cursor.execute(sqlstmt, values)
170 sqlstmt = "CREATE INDEX idx1 on homolog(genome, gID)"
171 cursor.execute(sqlstmt)
172 sqlstmt = "CREATE INDEX idx2 on homolog(homoloID)"
173 cursor.execute(sqlstmt)
179 def addHomologs(genomeA, genomeB, entries, hdb=dbPath):
180 """ Specify homology relationships between geneIDs to be inserted into homology database.
181 The entries list contains doubles of the form (gIDa, gIDb) from genome A and genome B, respectively.
184 for (geneID1, geneID2) in entries:
185 mapping[geneID1] = geneID2
190 db = sqlite.connect(hdb)
192 sql.execute('select * from homolog where genome = "%s" ' % genomeA)
193 results = sql.fetchall()
195 stmt = "insert into homolog(ID, homoloID, genome, gID) values (NULL, ?, ?, ?) "
198 for entry in results:
199 (rowID, hID, genome, gID) = entry
201 stmtArray.append((hID, genomeB, mapping[gID]))
205 if len(stmtArray) > 0:
206 print "Updating %d entries in homolog table" % len(stmtArray)
207 sql.executemany(stmt, stmtArray)
212 homologID = "%s-%s-%s" % (genomeA, genomeB, str(topHID))
213 stmtArray.append((homologID, genomeA, gID))
214 stmtArray.append((homologID, genomeB, mapping[gID]))
217 print "Adding %d new homology entries" % len(mapping)
218 sql.executemany(stmt, stmtArray)