from array import array
from commoncode import getReverseComplement, getConfigParser, getConfigOption
-currentRDSVersion = "2.0"
+currentRDSVersion = "2.1"
class ReadDatasetError(Exception):
self.memCursor = ""
self.cachedDBFile = ""
+ if initialize and datasetType not in ["DNA", "RNA"]:
+ raise ReadDatasetError("failed to initialize: datasetType must be 'DNA' or 'RNA'")
+
if cache:
if verbose:
print "caching ...."
self.dbcon.row_factory = sqlite.Row
self.dbcon.execute("PRAGMA temp_store = MEMORY")
if initialize:
- if datasetType not in ["DNA", "RNA"]:
- raise ReadDatasetError("failed to initialize: datasetType must be 'DNA' or 'RNA'")
- else:
- self.dataType = datasetType
-
+ self.dataType = datasetType
self.initializeTables(self.dbcon)
else:
metadata = self.getMetadata("dataType")
self.rdsVersion = "pre-1.0"
if verbose:
- if initialize:
- print "INITIALIZED dataset %s" % datafile
- else:
- print "dataset %s" % datafile
-
- metadata = self.getMetadata()
- print "metadata:"
- pnameList = metadata.keys()
- pnameList.sort()
- for pname in pnameList:
- print "\t" + pname + "\t" + metadata[pname]
-
- if reportCount:
- ucount = self.getUniqsCount()
- mcount = self.getMultiCount()
- if self.dataType == "DNA" and not initialize:
- try:
- print "\n%d unique reads and %d multireads" % (int(ucount), int(mcount))
- except ValueError:
- print "\n%s unique reads and %s multireads" % (ucount, mcount)
- elif self.dataType == "RNA" and not initialize:
- scount = self.getSplicesCount()
- try:
- print "\n%d unique reads, %d spliced reads and %d multireads" % (int(ucount), int(scount), int(mcount))
- except ValueError:
- print "\n%s unique reads, %s spliced reads and %s multireads" % (ucount, scount, mcount)
-
- print "default cache size is %d pages" % self.getDefaultCacheSize()
- if self.hasIndex():
- print "found index"
- else:
- print "not indexed"
+ self.printRDSInfo(datafile, reportCount, initialize)
def __len__(self):
self.uncacheDB()
+ def printRDSInfo(self, datafile, reportCount, initialize):
+ if initialize:
+ print "INITIALIZED dataset %s" % datafile
+ else:
+ print "dataset %s" % datafile
+
+ metadata = self.getMetadata()
+ print "metadata:"
+ pnameList = metadata.keys()
+ pnameList.sort()
+ for pname in pnameList:
+ print "\t" + pname + "\t" + metadata[pname]
+
+ if reportCount and not initialize:
+ self.printReadCounts()
+
+ print "default cache size is %d pages" % self.getDefaultCacheSize()
+ if self.hasIndex():
+ print "found index"
+ else:
+ print "not indexed"
+
+
+ def printReadCounts(self):
+ ucount = self.getUniqsCount()
+ mcount = self.getMultiCount()
+ if self.dataType == "DNA":
+ print "\n%d unique reads and %d multireads" % (ucount, mcount)
+ elif self.dataType == "RNA":
+ scount = self.getSplicesCount()
+ print "\n%d unique reads, %d spliced reads and %d multireads" % (ucount, scount, mcount)
+
+
def cacheDB(self, filename):
""" copy geneinfoDB to a local cache.
"""
tableSchema = "(ID INTEGER PRIMARY KEY, readID varchar, chrom varchar, %s, sense varchar, weight real, flag varchar, mismatch varchar)" % positionSchema
dbConnection.execute("create table splices %s" % tableSchema)
+ positionSchema = "startL int, stopL int, startR int, stopR int"
+ tableSchema = "(ID INTEGER PRIMARY KEY, readID varchar, chrom varchar, %s, sense varchar, weight real, flag varchar, mismatch varchar)" % positionSchema
+ dbConnection.execute("create table multisplices %s" % tableSchema)
+
dbConnection.commit()
self.dbcon.commit()
+ def insertMultisplices(self, valuesList):
+ """ inserts a list of (readID, chrom, startL, stopL, startR, stopR, sense, weight, flag, mismatch)
+ into the multisplices table.
+ """
+ self.dbcon.executemany("insert into multisplices(ID, readID, chrom, startL, stopL, startR, stopR, sense, weight, flag, mismatch) values (NULL,?,?,?,?,?,?,?,?,?,?)", valuesList)
+ self.dbcon.commit()
+
+
def flagReads(self, regionsList, uniqs=True, multi=False, splices=False, sense="both"):
""" update reads on file database in a list region of regions for a chromosome to have a new flag.
regionsList must have 4 fields per region of the form (flag, chrom, start, stop) or, with