X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=ReadDataset.py;fp=ReadDataset.py;h=c9d2a0bf2b4eb56e0fa906be2319fcac866c30b3;hp=850a5ec602a7b6d7a9a22ac98c4bf3eef4b873e5;hb=03f1e0b3bab22d517ad75b9af4d54e8fcb8540fb;hpb=b54eee5365a0fad35d2f6168eaac82ff5a359222 diff --git a/ReadDataset.py b/ReadDataset.py index 850a5ec..c9d2a0b 100644 --- a/ReadDataset.py +++ b/ReadDataset.py @@ -207,15 +207,27 @@ class ReadDataset(): return sql + def getMemCursor(self): + """ returns a cursor to memory database for low-level (SQL) + access to the data. + """ + return self.memcon.cursor() + + + def getFileCursor(self): + """ returns a cursor to file database for low-level (SQL) + access to the data. + """ + return self.dbcon.cursor() + + def hasIndex(self): - """ check whether the RDS file has at least one index. + """ return True if the RDS file has at least one index. """ stmt = "select count(*) from sqlite_master where type='index'" count = int(self.execute(stmt, returnResults=True)[0][0]) - if count > 0: - return True - return False + return count > 0 def initializeTables(self, dbConnection, cache=100000): @@ -237,20 +249,6 @@ class ReadDataset(): dbConnection.commit() - def getFileCursor(self): - """ returns a cursor to file database for low-level (SQL) - access to the data. - """ - return self.dbcon.cursor() - - - def getMemCursor(self): - """ returns a cursor to memory database for low-level (SQL) - access to the data. - """ - return self.memcon.cursor() - - def getMetadata(self, valueName=""): """ returns a dictionary of metadata. """ @@ -309,7 +307,7 @@ class ReadDataset(): def getChromosomes(self, table="uniqs", fullChrom=True): - """ returns a list of distinct chromosomes in table. + """ returns a sorted list of distinct chromosomes in table. """ statement = "select distinct chrom from %s" % table sql = self.getSqlCursor() @@ -330,7 +328,7 @@ class ReadDataset(): return results - def getMaxCoordinate(self, chrom, verbose=False, doUniqs=True, + def getMaxCoordinate(self, chrom, doUniqs=True, doMulti=False, doSplices=False): """ returns the maximum coordinate for reads on a given chromosome. """ @@ -347,9 +345,6 @@ class ReadDataset(): multiMax = self.getMaxStartCoordinateInTable(chrom, "multi") maxCoord = max(multiMax, maxCoord) - if verbose: - print "%s maxCoord: %d" % (chrom, maxCoord) - return maxCoord @@ -375,9 +370,9 @@ class ReadDataset(): and which can be restricted by chromosome or custom-flag. Returns unique reads by default, but can return multireads with doMulti set to True. - - Need to rethink original design 1: Cannot have pairID without exporting as a readIDDict + """ + #TODO: Need to rethink original design 1: Cannot have pairID without exporting as a readIDDict whereQuery = self.getReadWhereQuery(chrom, flag, flagLike, start, stop, hasMismatch, strand, readLike) if findallOptimize: @@ -421,27 +416,16 @@ class ReadDataset(): if findallOptimize: if self.memBacked: self.memcon.row_factory = None - sql = self.memcon.cursor() else: self.dbcon.row_factory = None - sql = self.dbcon.cursor() stmt.append("order by start") elif readIDDict: - if self.memBacked: - sql = self.memcon.cursor() - else: - sql = self.dbcon.cursor() - stmt.append("order by readID, start") else: - if self.memBacked: - sql = self.memcon.cursor() - else: - sql = self.dbcon.cursor() - stmt.append("order by chrom, start") + sql = self.getSqlCursor() sqlQuery = string.join(stmt) sql.execute(sqlQuery) @@ -602,10 +586,7 @@ class ReadDataset(): whereQuery = self.getReadWhereQuery(chrom, flag, flagLike, start, stop, hasMismatch, strand, splice=True) selectClause = "select ID, chrom, startL, stopL, startR, stopR, readID" selectQuery = self.getReadSelectQuery(selectClause, noSense, withWeight, withFlag, withMismatch) - if self.memBacked: - sql = self.memcon.cursor() - else: - sql = self.dbcon.cursor() + sql = self.getSqlCursor() stmt = "%s from splices %s order by chrom, startL" % (selectQuery, whereQuery) sql.execute(stmt) @@ -718,7 +699,7 @@ class ReadDataset(): def getTableEntryCount(self, table, chrom="", rmin="", rmax="", restrict="", distinct=False, startField="start"): - """ returns the number of row in the uniqs table. + """ returns the number of row in the specified table. """ whereClause = [] count = 0 @@ -741,10 +722,7 @@ class ReadDataset(): else: whereQuery = "" - if self.memBacked: - sql = self.memcon.cursor() - else: - sql = self.dbcon.cursor() + sql = self.getSqlCursor() if distinct: sql.execute("select count(distinct chrom+%s+sense) from %s %s" % (startField, table, whereQuery)) @@ -803,11 +781,7 @@ class ReadDataset(): limitPart = "LIMIT %d" % limit sqlQuery = "%s group by readID %s" % (selectPart, limitPart) - if self.memBacked: - sql = self.memcon.cursor() - else: - sql = self.dbcon.cursor() - + sql = self.getSqlCursor() sql.execute(sqlQuery) result = sql.fetchall()