+###########################################################################
+# #
+# C O P Y R I G H T N O T I C E #
+# Copyright (c) 2003-10 by: #
+# * California Institute of Technology #
+# #
+# All Rights Reserved. #
+# #
+# Permission is hereby granted, free of charge, to any person #
+# obtaining a copy of this software and associated documentation files #
+# (the "Software"), to deal in the Software without restriction, #
+# including without limitation the rights to use, copy, modify, merge, #
+# publish, distribute, sublicense, and/or sell copies of the Software, #
+# and to permit persons to whom the Software is furnished to do so, #
+# subject to the following conditions: #
+# #
+# The above copyright notice and this permission notice shall be #
+# included in all copies or substantial portions of the Software. #
+# #
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, #
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF #
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND #
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS #
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN #
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN #
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE #
+# SOFTWARE. #
+###########################################################################
+#
+try:
+ from pysqlite2 import dbapi2 as sqlite
+except:
+ from sqlite3 import dbapi2 as sqlite
+
+import tempfile, shutil, os
+from os import environ
+
+if environ.get("CISTEMATIC_ROOT"):
+ cisRoot = environ.get("CISTEMATIC_ROOT")
+else:
+ cisRoot = "/proj/genome"
+
+from cistematic.core.geneinfo import speciesMap
+
+dbPath = "%s/db/homologene.db" % cisRoot
+homologeneGenomes = ["hsapiens", "mmusculus", "rnorvegicus", "celegans",
+ "cbriggsae", "cremanei", "dmelanogaster", "athaliana",
+ "ggallus", "cfamiliaris", "drerio", "scerevisiae"]
+
+if environ.get("CISTEMATIC_TEMP"):
+ cisTemp = environ.get("CISTEMATIC_TEMP")
+else:
+ cisTemp = "/tmp"
+tempfile.tempdir = cisTemp
+
+
+class homologyDB:
+ """ The homologyDB class allows for the mapping and return of predefined homology relationships.
+ """
+ startingGenome = ""
+ targetGenomes = []
+ homologousGenes = {}
+ cachedDB = ""
+
+
+ def __init__(self, tGenomes=[], cache=False):
+ """ initialize the homologyDB object with a target genome and cache database, if desired.
+ """
+ self.targetGenomes = tGenomes
+ if cache:
+ self.cacheDB()
+
+
+ def __del__(self):
+ """ cleanup copy in local cache, if present.
+ """
+ if self.cachedDB != "":
+ self.uncacheDB()
+
+
+ def cacheDB(self):
+ """ copy homologyDB to a local cache.
+ """
+ self.cachedDB = "%s.db" % tempfile.mktemp()
+ shutil.copyfile(dbPath, self.cachedDB)
+
+
+ def uncacheDB(self):
+ """ delete homologyDB from local cache.
+ """
+ global cachedDB
+ if self.cachedDB != "":
+ try:
+ os.remove(self.cachedDB)
+ except:
+ print "could not delete %s" % self.cachedDB
+
+ self.cachedDB = ""
+
+
+ def connectDB(self):
+ """ return a handle to the database.
+ """
+ path = dbPath
+ if self.cachedDB != "":
+ path = self.cachedDB
+
+ return sqlite.connect(path, timeout=60)
+
+
+ def loadDB(self):
+ """ deprecated.
+ """
+ pass
+
+
+ def getHomologousGenes(self, geneID):
+ """ return list of geneIDs homologous to given geneID. Limit to target genomes if specified at initialization.
+ """
+ db = self.connectDB()
+ cursor = db.cursor()
+ results = []
+ (gen, gid) = geneID
+ cursor.execute("select homoloID from homolog where genome = :gen and gID = :gid", locals())
+ groups = cursor.fetchall()
+ for hIDentry in groups:
+ homoloID = str(hIDentry[0])
+ cursor.execute("select genome, gID from homolog where homoloID = :homoloID ", locals())
+ genes = cursor.fetchall()
+ for gene in genes:
+ if gene != geneID:
+ (genome, gID) = gene
+ if len(self.targetGenomes) > 0:
+ if genome not in self.targetGenomes:
+ pass
+ else:
+ results.append((str(genome), str(gID)))
+ else:
+ results.append((str(genome), str(gID)))
+
+ cursor.close()
+ db.close()
+
+ return results
+
+
+def buildHomologeneDB(hFile="homologene.data", hdb=dbPath):
+ """ Populate a new homologyDB database with homology relationships from homologene.
+ """
+ inFile = open(hFile)
+ db = sqlite.connect(hdb)
+ cursor = db.cursor()
+ cursor.execute("create table homolog(ID INTEGER PRIMARY KEY, homoloID varchar, genome varchar, gID varchar)")
+ for line in inFile:
+ doInsert = False
+ sqlstmt = "INSERT into homolog(ID, homoloID, genome, gID) values (NULL, ?, ?, ?)"
+ field = line.split("\t")
+ if field[1] in speciesMap:
+ gid = field[2]
+ if speciesMap[field[1]] == "arabidopsis":
+ gid = field[3].upper()
+
+ values = ("homologene-%s" % field[0], speciesMap[field[1]], gid.strip())
+ doInsert = True
+
+ if doInsert:
+ cursor.execute(sqlstmt, values)
+
+ inFile.close()
+ sqlstmt = "CREATE INDEX idx1 on homolog(genome, gID)"
+ cursor.execute(sqlstmt)
+ sqlstmt = "CREATE INDEX idx2 on homolog(homoloID)"
+ cursor.execute(sqlstmt)
+ db.commit()
+ cursor.close()
+ db.close()
+
+
+def addHomologs(genomeA, genomeB, entries, hdb=dbPath):
+ """ Specify homology relationships between geneIDs to be inserted into homology database.
+ The entries list contains doubles of the form (gIDa, gIDb) from genome A and genome B, respectively.
+ """
+ mapping = {}
+ for (geneID1, geneID2) in entries:
+ mapping[geneID1] = geneID2
+
+ if len(mapping) < 1:
+ return
+
+ db = sqlite.connect(hdb)
+ sql = db.cursor()
+ sql.execute('select * from homolog where genome = "%s" ' % genomeA)
+ results = sql.fetchall()
+
+ stmt = "insert into homolog(ID, homoloID, genome, gID) values (NULL, ?, ?, ?) "
+ stmtArray = []
+
+ for entry in results:
+ (rowID, hID, genome, gID) = entry
+ if gID in mapping:
+ stmtArray.append((hID, genomeB, mapping[gID]))
+ del mapping[gID]
+
+ topHID = 0
+ if len(stmtArray) > 0:
+ print "Updating %d entries in homolog table" % len(stmtArray)
+ sql.executemany(stmt, stmtArray)
+
+ stmtArray = []
+ for gID in mapping:
+ topHID += 1
+ homologID = "%s-%s-%s" % (genomeA, genomeB, str(topHID))
+ stmtArray.append((homologID, genomeA, gID))
+ stmtArray.append((homologID, genomeB, mapping[gID]))
+
+ if len(mapping) > 0:
+ print "Adding %d new homology entries" % len(mapping)
+ sql.executemany(stmt, stmtArray)
+
+ db.commit()
+ sql.close()
+ db.close()
\ No newline at end of file