Rewrite of findall.py to clean code. Configuration tested using

[erange.git] / ReadDataset.py
diff --git a/ReadDataset.py b/ReadDataset.py

index 850a5ec602a7b6d7a9a22ac98c4bf3eef4b873e5..c9d2a0bf2b4eb56e0fa906be2319fcac866c30b3 100644 (file)
--- a/ReadDataset.py
+++ b/ReadDataset.py
@@ -207,15 +207,27 @@ class ReadDataset():
          return sql
  
  
+    def getMemCursor(self):
+        """ returns a cursor to memory database for low-level (SQL)
+        access to the data.
+        """
+        return self.memcon.cursor()
+
+
+    def getFileCursor(self):
+        """ returns a cursor to file database for low-level (SQL)
+        access to the data.
+        """
+        return self.dbcon.cursor()
+
+
      def hasIndex(self):
-        """ check whether the RDS file has at least one index.
+        """ return True if the RDS file has at least one index.
          """
          stmt = "select count(*) from sqlite_master where type='index'"
          count = int(self.execute(stmt, returnResults=True)[0][0])
-        if count > 0:
-            return True
  
-        return False
+        return count > 0
  
  
      def initializeTables(self, dbConnection, cache=100000):
@@ -237,20 +249,6 @@ class ReadDataset():
          dbConnection.commit()
  
  
-    def getFileCursor(self):
-        """ returns a cursor to file database for low-level (SQL)
-        access to the data.
-        """
-        return self.dbcon.cursor()
-
-
-    def getMemCursor(self):
-        """ returns a cursor to memory database for low-level (SQL)
-        access to the data.
-        """
-        return self.memcon.cursor()
-
-
      def getMetadata(self, valueName=""):
          """ returns a dictionary of metadata.
          """
@@ -309,7 +307,7 @@ class ReadDataset():
  
  
      def getChromosomes(self, table="uniqs", fullChrom=True):
-        """ returns a list of distinct chromosomes in table.
+        """ returns a sorted list of distinct chromosomes in table.
          """
          statement = "select distinct chrom from %s" % table
          sql = self.getSqlCursor()
@@ -330,7 +328,7 @@ class ReadDataset():
          return results
  
  
-    def getMaxCoordinate(self, chrom, verbose=False, doUniqs=True,
+    def getMaxCoordinate(self, chrom, doUniqs=True,
                           doMulti=False, doSplices=False):
          """ returns the maximum coordinate for reads on a given chromosome.
          """
@@ -347,9 +345,6 @@ class ReadDataset():
              multiMax = self.getMaxStartCoordinateInTable(chrom, "multi")
              maxCoord = max(multiMax, maxCoord)
  
-        if verbose:
-            print "%s maxCoord: %d" % (chrom, maxCoord)
-
          return maxCoord
  
  
@@ -375,9 +370,9 @@ class ReadDataset():
          and which can be restricted by chromosome or custom-flag.
          Returns unique reads by default, but can return multireads
          with doMulti set to True.
-        
-        Need to rethink original design 1: Cannot have pairID without exporting as a readIDDict
+
          """
+        #TODO: Need to rethink original design 1: Cannot have pairID without exporting as a readIDDict
  
          whereQuery = self.getReadWhereQuery(chrom, flag, flagLike, start, stop, hasMismatch, strand, readLike)
          if findallOptimize:
@@ -421,27 +416,16 @@ class ReadDataset():
          if findallOptimize:
              if self.memBacked:
                  self.memcon.row_factory = None
-                sql = self.memcon.cursor()
              else:
                  self.dbcon.row_factory = None
-                sql = self.dbcon.cursor()
  
              stmt.append("order by start")
          elif readIDDict:
-            if self.memBacked:
-                sql = self.memcon.cursor()
-            else:
-                sql = self.dbcon.cursor()
-
              stmt.append("order by readID, start")
          else:
-            if self.memBacked:
-                sql = self.memcon.cursor()
-            else:
-                sql = self.dbcon.cursor()
-
              stmt.append("order by chrom, start")
  
+        sql = self.getSqlCursor()
          sqlQuery = string.join(stmt)
          sql.execute(sqlQuery)
  
@@ -602,10 +586,7 @@ class ReadDataset():
          whereQuery = self.getReadWhereQuery(chrom, flag, flagLike, start, stop, hasMismatch, strand, splice=True)
          selectClause = "select ID, chrom, startL, stopL, startR, stopR, readID"
          selectQuery = self.getReadSelectQuery(selectClause, noSense, withWeight, withFlag, withMismatch)
-        if self.memBacked:
-            sql = self.memcon.cursor()
-        else:
-            sql = self.dbcon.cursor()
+        sql = self.getSqlCursor()
  
          stmt = "%s from splices %s order by chrom, startL" % (selectQuery, whereQuery)
          sql.execute(stmt)
@@ -718,7 +699,7 @@ class ReadDataset():
  
  
      def getTableEntryCount(self, table, chrom="", rmin="", rmax="", restrict="", distinct=False, startField="start"):
-        """ returns the number of row in the uniqs table.
+        """ returns the number of row in the specified table.
          """
          whereClause = []
          count = 0
@@ -741,10 +722,7 @@ class ReadDataset():
          else:
              whereQuery = ""
  
-        if self.memBacked:
-            sql = self.memcon.cursor()
-        else:
-            sql = self.dbcon.cursor()
+        sql = self.getSqlCursor()
  
          if distinct:
              sql.execute("select count(distinct chrom+%s+sense) from %s %s" % (startField, table, whereQuery))
@@ -803,11 +781,7 @@ class ReadDataset():
              limitPart = "LIMIT %d" % limit
  
          sqlQuery = "%s group by readID %s" % (selectPart, limitPart)
-        if self.memBacked:
-            sql = self.memcon.cursor()
-        else:
-            sql = self.dbcon.cursor()
-
+        sql = self.getSqlCursor()
          sql.execute(sqlQuery)
          result = sql.fetchall()