self.cachedDB = ""
- def attachDB(self, filename, asname):
+ def attachDB(self, filename, dbName):
""" attach another database file to the readDataset.
"""
- stmt = "attach '%s' as %s" % (filename, asname)
+ stmt = "attach '%s' as %s" % (filename, dbName)
self.execute(stmt)
- def detachDB(self, asname):
+ def detachDB(self, dbName):
""" detach a database file to the readDataset.
"""
- stmt = "detach %s" % (asname)
+ stmt = "detach %s" % (dbName)
self.execute(stmt)
- def importFromDB(self, asname, table, ascolumns="*", destcolumns="", flagged=""):
+ def importFromDB(self, dbName, table, ascolumns="*", destcolumns="", flagged=""):
""" import into current RDS the table (with columns destcolumns,
with default all columns) from the database file asname,
using the column specification of ascolumns (default all).
"""
- stmt = "insert into %s %s select %s from %s.%s" % (table, destcolumns, ascolumns, asname, table)
+ stmt = "insert into %s %s select %s from %s.%s" % (table, destcolumns, ascolumns, dbName, table)
if flagged != "":
stmt += " where flag = '%s' " % flagged
self.executeCommit(stmt)
- def getTables(self, asname=""):
+ def getTables(self, dbName=""):
""" get a list of table names in a particular database file.
"""
resultList = []
sql = self.getSqlCursor()
- if asname != "":
- asname += "."
+ if dbName != "":
+ dbName = "%s." % dbName
- stmt = "select name from %ssqlite_master where type='table'" % asname
+ stmt = "select name from %ssqlite_master where type='table'" % dbName
sql.execute(stmt)
results = sql.fetchall()
if "readsize" not in metadata:
raise ReadDatasetError("no readsize parameter defined")
else:
- mysize = metadata["readsize"]
- if "import" in mysize:
- mysize = mysize.split()[0]
+ readSize = metadata["readsize"]
+ if "import" in readSize:
+ readSize = readSize.split()[0]
- return int(mysize)
+ readSize = int(readSize)
+ if readSize < 0:
+ raise ReadDatasetError("readsize is negative")
+
+ return readSize
def getDefaultCacheSize(self):
if row["chrom"] not in results:
results.append(row["chrom"])
else:
- if len(row["chrom"][3:].strip()) < 1:
- continue
-
- if row["chrom"][3:] not in results:
- results.append(row["chrom"][3:])
+ shortName = row["chrom"][3:]
+ if len(shortName.strip()) > 0 and shortName not in results:
+ results.append(shortName)
results.sort()
""" returns the maximum coordinate for reads on a given chromosome.
"""
maxCoord = 0
- sql = self.getSqlCursor()
if doUniqs:
- try:
- sql.execute("select max(start) from uniqs where chrom = '%s'" % chrom)
- maxCoord = int(sql.fetchall()[0][0])
- except:
- print "couldn't retrieve coordMax for chromosome %s" % chrom
+ maxCoord = self.getMaxStartCoordinateInTable(chrom, "uniqs")
if doSplices:
- sql.execute("select max(startR) from splices where chrom = '%s'" % chrom)
- try:
- spliceMax = int(sql.fetchall()[0][0])
- if spliceMax > maxCoord:
- maxCoord = spliceMax
- except:
- pass
+ spliceMax = self.getMaxStartCoordinateInTable(chrom, "splices", startField="startR")
+ maxCoord = max(spliceMax, maxCoord)
if doMulti:
- sql.execute("select max(start) from multi where chrom = '%s'" % chrom)
- try:
- multiMax = int(sql.fetchall()[0][0])
- if multiMax > maxCoord:
- maxCoord = multiMax
- except:
- pass
+ multiMax = self.getMaxStartCoordinateInTable(chrom, "multi")
+ maxCoord = max(multiMax, maxCoord)
if verbose:
print "%s maxCoord: %d" % (chrom, maxCoord)
return maxCoord
+ def getMaxStartCoordinateInTable(self, chrom, table, startField="start"):
+ maxCoord = 0
+ sqlStatement = "select max(%s) from %s where chrom = '%s'" % (startField, table, chrom)
+ sql = self.getSqlCursor()
+ try:
+ sql.execute(sqlStatement)
+ maxCoord = int(sql.fetchall()[0][0])
+ except:
+ print "couldn't retrieve coordMax for chromosome %s" % chrom
+
+ return maxCoord
+
+
def getReadsDict(self, bothEnds=False, noSense=False, fullChrom=False, chrom="",
flag="", withWeight=False, withFlag=False, withMismatch=False, withID=False,
withChrom=False, withPairID=False, doUniqs=True, doMulti=False, findallOptimize=False,
Need to rethink original design 1: Cannot have pairID without exporting as a readIDDict
"""
- whereClause = []
- resultsDict = {}
-
- if chrom != "" and chrom != self.memChrom:
- whereClause.append("chrom = '%s'" % chrom)
-
- if flag != "":
- if flagLike:
- flagLikeClause = string.join(['flag LIKE "%', flag, '%"'], "")
- whereClause.append(flagLikeClause)
- else:
- whereClause.append("flag = '%s'" % flag)
-
- if start > -1:
- whereClause.append("start > %d" % start)
-
- if stop > -1:
- whereClause.append("stop < %d" % stop)
-
- if len(readLike) > 0:
- readIDClause = string.join(["readID LIKE '", readLike, "%'"], "")
- whereClause.append(readIDClause)
-
- if hasMismatch:
- whereClause.append("mismatch != ''")
-
- if strand in ["+", "-"]:
- whereClause.append("sense = '%s'" % strand)
-
- if len(whereClause) > 0:
- whereStatement = string.join(whereClause, " and ")
- whereQuery = "where %s" % whereStatement
- else:
- whereQuery = ""
- groupBy = []
+ whereQuery = self.getReadWhereQuery(chrom, flag, flagLike, start, stop, hasMismatch, strand, readLike)
if findallOptimize:
- selectClause = ["select start, sense, sum(weight)"]
- groupBy = ["GROUP BY start, sense"]
+ selectQuery = "select start, sense, sum(weight)"
else:
- selectClause = ["select ID, chrom, start, readID"]
- if bothEnds:
- selectClause.append("stop")
-
- if not noSense:
- selectClause.append("sense")
-
- if withWeight:
- selectClause.append("weight")
-
- if withFlag:
- selectClause.append("flag")
+ selectQuery = self.getReadSelectQuery("select ID, chrom, start, readID", noSense, withWeight, withFlag, withMismatch, bothEnds)
- if withMismatch:
- selectClause.append("mismatch")
-
- if limit > 0 and not combine5p:
- groupBy.append("LIMIT %d" % limit)
-
- selectQuery = string.join(selectClause, ",")
- groupQuery = string.join(groupBy)
+ groupQuery = self.getReadGroupQuery(findallOptimize, limit, combine5p)
if doUniqs:
stmt = [selectQuery, "from uniqs", whereQuery, groupQuery]
if doMulti:
sqlQuery = string.join(stmt)
sql.execute(sqlQuery)
+ resultsDict = {}
if findallOptimize:
resultsDict[chrom] = [{"start": int(row[0]), "sense": row[1], "weight": float(row[2])} for row in sql]
if self.memBacked:
return resultsDict
- def getSplicesDict(self, noSense=False, fullChrom=False, chrom="",
- flag="", withWeight=False, withFlag=False, withMismatch=False,
- withID=False, withChrom=False, withPairID=False, readIDDict=False,
- splitRead=False, hasMismatch=False, flagLike=False, start=-1,
- stop=-1, strand=""):
- """ returns a dictionary of spliced reads in a variety of
- formats and which can be restricted by chromosome or custom-flag.
- Returns unique spliced reads for now.
- """
- whereClause = []
- resultsDict = {}
+ def getReadWhereQuery(self, chrom, flag, flagLike, start, stop, hasMismatch, strand, readLike="", splice=False):
+ if splice:
+ startText = "startL"
+ stopText = "stopR"
+ else:
+ startText = "start"
+ stopText = "stop"
+ whereClause = []
if chrom != "" and chrom != self.memChrom:
- whereClause = ["chrom = '%s'" % chrom]
+ whereClause.append("chrom = '%s'" % chrom)
if flag != "":
if flagLike:
else:
whereClause.append("flag = '%s'" % flag)
+ if start > -1:
+ whereClause.append("%s > %d" % (startText, start))
+
+ if stop > -1:
+ whereClause.append("%s < %d" % (stopText, stop))
+
+ if len(readLike) > 0:
+ readIDClause = string.join(["readID LIKE '", readLike, "%'"], "")
+ whereClause.append(readIDClause)
+
if hasMismatch:
whereClause.append("mismatch != ''")
- if strand != "":
+ if strand in ["+", "-"]:
whereClause.append("sense = '%s'" % strand)
- if start > -1:
- whereClause.append("startL > %d" % start)
-
- if stop > -1:
- whereClause.append("stopR < %d" % stop)
-
if len(whereClause) > 0:
whereStatement = string.join(whereClause, " and ")
whereQuery = "where %s" % whereStatement
else:
whereQuery = ""
- selectClause = ["select ID, chrom, startL, stopL, startR, stopR, readID"]
+ return whereQuery
+
+
+ def getReadSelectQuery(self, baseSelect, noSense, withWeight, withFlag, withMismatch, bothEnds=False):
+
+ selectClause = [baseSelect]
+ if bothEnds:
+ selectClause.append("stop")
+
if not noSense:
selectClause.append("sense")
if withMismatch:
selectClause.append("mismatch")
- selectQuery = string.join(selectClause, " ,")
+ selectQuery = string.join(selectClause, ",")
+
+ return selectQuery
+
+
+ def getReadGroupQuery(self, findallOptimize, limit, combine5p):
+ groupBy = []
+ if findallOptimize:
+ groupBy = ["GROUP BY start, sense"]
+
+ if limit > 0 and not combine5p:
+ groupBy.append("LIMIT %d" % limit)
+
+ groupQuery = string.join(groupBy)
+
+ return groupQuery
+
+
+ def getSplicesDict(self, noSense=False, fullChrom=False, chrom="",
+ flag="", withWeight=False, withFlag=False, withMismatch=False,
+ withID=False, withChrom=False, withPairID=False, readIDDict=False,
+ splitRead=False, hasMismatch=False, flagLike=False, start=-1,
+ stop=-1, strand=""):
+ """ returns a dictionary of spliced reads in a variety of
+ formats and which can be restricted by chromosome or custom-flag.
+ Returns unique spliced reads for now.
+ """
+ whereQuery = self.getReadWhereQuery(chrom, flag, flagLike, start, stop, hasMismatch, strand, splice=True)
+ selectClause = "select ID, chrom, startL, stopL, startR, stopR, readID"
+ selectQuery = self.getReadSelectQuery(selectClause, noSense, withWeight, withFlag, withMismatch)
if self.memBacked:
sql = self.memcon.cursor()
else:
sql.execute(stmt)
currentReadID = ""
currentChrom = ""
+ resultsDict = {}
for row in sql:
pairID = 0
readID = row["readID"]
""" get readID's.
"""
stmt = []
- limitPart = ""
- if limit > 0:
- limitPart = "LIMIT %d" % limit
-
if uniqs:
stmt.append("select readID from uniqs")
else:
selectPart = ""
+ limitPart = ""
+ if limit > 0:
+ limitPart = "LIMIT %d" % limit
+
sqlQuery = "%s group by readID %s" % (selectPart, limitPart)
if self.memBacked:
sql = self.memcon.cursor()
print "getting mismatches from chromosome %s" % (achrom)
snpDict[achrom] = []
- hitDict = self.getReadsDict(fullChrom=True, chrom=achrom, withMismatch=True, hasMismatch=True)
if useSplices and self.dataType == "RNA":
spliceDict = self.getSplicesDict(fullChrom=True, chrom=achrom, withMismatch=True, readIDDict=True, hasMismatch=True)
spliceIDList = spliceDict.keys()
- for k in spliceIDList:
- spliceEntry = spliceDict[k][0]
+ for spliceID in spliceIDList:
+ spliceEntry = spliceDict[spliceID][0]
startpos = spliceEntry["startL"]
lefthalf = spliceEntry["stopL"]
rightstart = spliceEntry["startR"]
snpDict[achrom].append([startpos, change_at, change_base, change_from])
+ hitDict = self.getReadsDict(fullChrom=True, chrom=achrom, withMismatch=True, hasMismatch=True)
if achrom not in hitDict.keys():
continue
def getChromProfile(self, chromosome, cstart=-1, cstop=-1, useMulti=True,
- useSplices=False, normalizationFactor = 1.0, trackStrand=False,
+ useSplices=False, normalizationFactor=1.0, trackStrand=False,
keepStrand="both", shiftValue=0):
"""return a profile of the chromosome as an array of per-base read coverage....
keepStrand = 'both', 'plusOnly', or 'minusOnly'.
dataType = metadata["dataType"]
scale = 1. / normalizationFactor
shift = {}
- shift['+'] = int(shiftValue)
- shift['-'] = -1 * int(shiftValue)
+ shift["+"] = int(shiftValue)
+ shift["-"] = -1 * int(shiftValue)
if cstop > 0:
lastNT = self.getMaxCoordinate(chromosome, doMulti=useMulti, doSplices=useSplices) + readlen