erange version 4.0a dev release
[erange.git] / test / testReadDataset.py
1 '''
2 Created on Jul 21, 2010
3
4 @author: sau
5 '''
6 import unittest
7 import os
8 import sqlite3 as sqlite
9 from erange import ReadDataset
10
11 testDBName = "testRDS.rds"
12 rnaTestDBName = "testRDSRNA.rds"
13
14 class TestReadDataset(unittest.TestCase):
15
16
17     def setUp(self):
18         self.rds = ReadDataset.ReadDataset(testDBName, initialize=True, datasetType="DNA", verbose=False)
19         self.rnaRds = ReadDataset.ReadDataset(rnaTestDBName, initialize=True, datasetType="RNA", verbose=False)
20
21
22     def tearDown(self):
23         del(self.rds)
24         os.remove(testDBName)
25         del(self.rnaRds)
26         os.remove(rnaTestDBName)
27
28
29     #TODO: rename and integrate
30     def testZeeNewStuff(self):
31         rdsEntryList = [("testRead", "chr1", 1, 100, "+", 1.0, "", ""),
32                         ("dup start", "chr1", 1, 150, "+", 1.0, "", ""),
33                         ("new read", "chr1", 80, 100, "+", 1.0, "", ""),
34                         ("testRead", "chr2", 201, 400, "+", 1.0, "", ""),
35                         ("dup start", "chr2", 201, 450, "+", 1.0, "", ""),
36                         ("new read", "chr2", 280, 400, "+", 1.0, "", ""),
37                         ("three up", "chr3", 1, 80, "+", 1.0, "", ""),
38                         ("three two", "chr3", 201, 230, "+", 1.0, "", "")
39         ]
40         self.rds.insertUniqs(rdsEntryList)
41         dbcon = sqlite.connect(testDBName)
42         sql = dbcon.cursor()
43         sql.execute("select chrom,start from uniqs group by chrom,start having ( count(start) > 1 and count(chrom) > 1)")
44         result = [("chr1", 1), ("chr2", 201)]
45         for eachEntry in sql.fetchall():
46             self.assertTrue(eachEntry in result)
47
48         sql.execute("select chrom,start from uniqs group by chrom,start having ( count(start) = 1 and count(chrom) = 1)")
49         result = [("chr1", 80), ("chr2", 280), ("chr3", 1), ("chr3", 201)]
50         for eachEntry in sql.fetchall():
51             self.assertTrue(eachEntry in result)
52
53         sql.execute("select * from uniqs group by chrom,start having ( count(start) > 1 and count(chrom) > 1) union select * from uniqs group by chrom,start having ( count(start) = 1 and count(chrom) = 1)")
54         result = [(2, "dup start", "chr1", 1, 150, "+", 1.0, "", ""),
55                   (3, "new read", "chr1", 80, 100, "+", 1.0, "", ""),
56                   (5, "dup start", "chr2", 201, 450, "+", 1.0, "", ""),
57                   (6, "new read", "chr2", 280, 400, "+", 1.0, "", ""),
58                   (7, "three up", "chr3", 1, 80, "+", 1.0, "", ""),
59                   (8, "three two", "chr3", 201, 230, "+", 1.0, "", "")
60         ]
61         for eachEntry in sql.fetchall():
62             self.assertTrue(eachEntry in result)
63
64         sql.execute("select chrom,start from uniqs where start > 100 group by chrom,start having ( count(start) > 1 and count(chrom) > 1) order by chrom,start")
65         result = [("chr2", 201)]
66         for eachEntry in sql.fetchall():
67             self.assertTrue(eachEntry in result)
68
69
70         rdsEntryList = [("testMultiRead", "chr1", 1, 200, "+", 0.5, "", ""),
71                         ("testMultiRead", "chr1", 1, 200, "+", 0.5, "", ""),
72                         ("testMultiRead", "chr2", 80, 200, "+", 0.5, "", ""),
73                         ("testMultiRead", "chr2", 1, 200, "+", 0.5, "", ""),
74                         ("testMultiRead", "chr2", 5000, 25000, "+", 0.5, "", ""),
75                         ("testMultiRead", "chr3", 1, 200, "+", 0.5, "", ""),
76                         ("testMultiRead", "chr3", 70, 500, "+", 0.5, "", "")
77         ]
78         self.rds.insertMulti(rdsEntryList)
79         sql.execute("select chrom,start from (select chrom,start from uniqs union all select chrom,start from multi) group by chrom,start having ( count(start) > 1 and count(chrom) > 1)")
80         result = [("chr1", 1), ("chr2", 201), ("chr3", 1)]
81         for eachEntry in sql.fetchall():
82             self.assertTrue(eachEntry in result)
83
84         sql.execute("select chrom,start from (select chrom,start from uniqs union all select chrom,start from multi) group by chrom,start having ( count(start) = 1 and count(chrom) = 1)")
85         result = [("chr1", 80),
86                   ("chr2", 1), ("chr2", 80), ("chr2", 280), ("chr2", 5000),
87                   ("chr3", 70), ("chr3", 201)
88         ]
89         for eachEntry in sql.fetchall():
90             self.assertTrue(eachEntry in result)
91
92         sql.execute("select chrom,start from (select chrom,start from uniqs union all select chrom,start from multi) group by chrom,start having ( count(start) > 1 and count(chrom) > 1) union select chrom,start from (select chrom,start from uniqs union all select chrom,start from multi) group by chrom,start having ( count(start) = 1 and count(chrom) = 1)")
93         result = sql.fetchall()
94         result = [("chr1", 1), ("chr1", 80),
95                   ("chr2", 1), ("chr2", 80), ("chr2", 201), ("chr2", 280), ("chr2", 5000),
96                   ("chr3", 1), ("chr3", 70), ("chr3", 201)
97         ]
98         for eachEntry in sql.fetchall():
99             self.assertTrue(eachEntry in result)
100
101         result = {"1": [{"start": 1, "sense": "+"}, {"start": 80, "sense": "+"}],
102                   "3": [{"start": 1, "sense": "+"}, {"start": 70, "sense": "+"}, {"start": 201, "sense": "+"}],
103                   "2": [{"start": 1, "sense": "+"}, {"start": 80, "sense": "+"}, {"start": 201, "sense": "+"}, {"start": 280, "sense": "+"}, {"start": 5000, "sense": "+"}]
104         }
105         self.assertEquals(result, self.rds.getReadsDict(combine5p=True, doMulti=True))
106
107         print self.rds.getReadsDict(combine5p=True, doMulti=True, withWeight=True)
108
109     def testReadDatasetBuiltIns(self):
110         # Initialize an existing rds file
111         self.assertRaises(sqlite.OperationalError, ReadDataset.ReadDataset, testDBName, initialize=True, datasetType="DNA", verbose=True)
112         self.assertEquals(0, len(self.rds))
113
114         rdsEntryList = [("testRead", "chr1", 1, 100, "+", 1.0, "", "")]
115         self.rds.insertUniqs(rdsEntryList)
116         self.assertEquals(1, len(self.rds))
117
118         rdsEntryList = [("testMultiRead", "chr1", 101, 200, "+", 0.5, "", ""),
119                         ("testMultiRead", "chr1", 101, 200, "+", 0.5, "", "")]
120         self.rds.insertMulti(rdsEntryList)
121         self.assertEquals(2, len(self.rds))
122
123         rdsEntryList = [("testSpliceRead", "chr1", 1000, 1100, 1150, 1200, "+", 1.0, "", "")]
124         self.assertRaises(sqlite.OperationalError, self.rds.insertSplices, rdsEntryList)
125         self.rnaRds.insertSplices(rdsEntryList)
126         self.assertEquals(2, len(self.rds))
127         self.assertEquals(1, len(self.rnaRds))
128
129
130     def testInsertUniqs(self):
131         rdsEntryList = [("testRead", "chr1", 1, 100, "+", 1.0, "", "")]
132         self.rds.insertUniqs(rdsEntryList)
133         self.assertEquals(1, len(self.rds))
134
135         rdsEntryList = [("testRead2", "chr1", 200, 300, "+", 1.0, "", "")]
136         self.rds.insertUniqs(rdsEntryList)
137         self.assertEquals(2, len(self.rds))
138
139
140     def testInsertMulti(self):
141         rdsEntryList = [("testMultiRead", "chr1", 101, 200, "+", 0.5, "", ""),
142                         ("testMultiRead", "chr1", 101, 200, "+", 0.5, "", "")]
143         self.rds.insertMulti(rdsEntryList)
144         self.assertEquals(1, len(self.rds))
145
146
147     def testInsertSplices(self):
148         rdsEntryList = [("testSpliceRead", "chr1", 1000, 1100, 1150, 1200, "+", 1.0, "", "")]
149         self.assertRaises(sqlite.OperationalError, self.rds.insertSplices, rdsEntryList)
150         self.rnaRds.insertSplices(rdsEntryList)
151         self.assertEquals(0, len(self.rds))
152         self.assertEquals(1, len(self.rnaRds))
153
154
155     def testGetChromosomes(self):
156         result = []
157         self.assertEqual(result, self.rds.getChromosomes(table="uniqs", fullChrom=True))
158
159         rdsEntryList = [("testRead", "chr1", 1, 100, "+", 1.0, "", "")]
160         self.rds.insertUniqs(rdsEntryList)
161         result = ["chr1"]
162         self.assertEqual(result, self.rds.getChromosomes(table="uniqs", fullChrom=True))
163
164         self.assertRaises(sqlite.OperationalError, self.rds.getChromosomes, table="badTableName")
165
166
167     #TODO: write unit test
168     def testAttachDB(self):
169         pass
170
171
172     #TODO: write unit test
173     def testDetachDB(self):
174         pass
175
176
177     #TODO: write unit test
178     def testImportFromDB(self):
179         pass
180
181
182     def testGetTables(self):
183         result = ["metadata", "uniqs", "multi"]
184         self.assertEquals(result, self.rds.getTables())
185
186         result = ["metadata", "uniqs", "multi", "splices"]
187         self.assertEquals(result, self.rnaRds.getTables())
188
189
190     def testHasIndex(self):
191         self.assertFalse(self.rds.hasIndex())
192         self.rds.buildIndex()
193         self.assertTrue(self.rds.hasIndex())
194
195
196     def testGetMetadata(self):
197         returnDict = self.rds.getMetadata()
198         self.assertTrue(returnDict.has_key("rdsVersion"))
199         self.assertEquals(returnDict["dataType"], "DNA")
200
201         result = {"dataType": "RNA"}
202         self.assertEquals(result, self.rnaRds.getMetadata("dataType"))
203
204         result = {}
205         self.assertEquals(result, self.rds.getMetadata("badMetaDataName"))
206
207
208     def testGetReadSize(self):
209         self.assertRaises(ReadDataset.ReadDatasetError, self.rds.getReadSize)
210
211         self.rds.insertMetadata([("readsize", "100")])
212         self.assertEquals(100, self.rds.getReadSize())
213
214         self.rds.updateMetadata("readsize", 100)
215         self.assertEquals(100, self.rds.getReadSize())
216
217         self.rds.updateMetadata("readsize", "100 import")
218         self.assertEquals(100, self.rds.getReadSize())
219
220         self.rds.updateMetadata("readsize", "badReadSize")
221         self.assertRaises(ValueError, self.rds.getReadSize)
222
223
224     def testGetDefaultCacheSize(self):
225         self.assertEquals(100000, self.rds.getDefaultCacheSize())
226
227
228     def testGetMaxCoordinate(self):
229         self.assertEquals(0, self.rnaRds.getMaxCoordinate("chr1"))
230
231         rdsEntryList = [("testRead", "chr1", 1, 100, "+", 1.0, "", "")]
232         self.rnaRds.insertUniqs(rdsEntryList)
233         self.assertEquals(1, self.rnaRds.getMaxCoordinate("chr1"))
234         self.assertEquals(0, self.rnaRds.getMaxCoordinate("chr2"))
235         self.assertEquals(0, self.rnaRds.getMaxCoordinate("chr1", doUniqs=False))
236
237         rdsEntryList = [("testMultiRead", "chr1", 101, 200, "+", 0.5, "", ""),
238                         ("testMultiRead", "chr1", 101, 200, "+", 0.5, "", "")]
239         self.rnaRds.insertMulti(rdsEntryList)
240         self.assertEquals(1, self.rnaRds.getMaxCoordinate("chr1"))
241         self.assertEquals(101, self.rnaRds.getMaxCoordinate("chr1", doMulti=True))
242
243         rdsEntryList = [("testSpliceRead", "chr1", 1000, 1100, 1150, 1200, "+", 1.0, "", "")]
244         self.rnaRds.insertSplices(rdsEntryList)
245         self.assertEquals(1, self.rnaRds.getMaxCoordinate("chr1"))
246         self.assertEquals(101, self.rnaRds.getMaxCoordinate("chr1", doMulti=True))
247         self.assertEquals(1150, self.rnaRds.getMaxCoordinate("chr1", doSplices=True))
248
249
250     def testGetReadsDict(self):
251         self.assertEquals({}, self.rds.getReadsDict())
252
253         rdsEntryList = [("testRead", "chr1", 1, 100, "+", 1.0, "", "")]
254         self.rds.insertUniqs(rdsEntryList)
255         reads = self.rds.getReadsDict()
256         self.assertEquals(1, len(reads))
257         self.assertEquals(1, len(reads["1"]))
258         read = reads["1"][0]
259         self.assertEquals(["start", "sense"], read.keys())
260         self.assertEquals(1, read["start"])
261         self.assertEquals("+", read["sense"])
262
263         reads = self.rds.getReadsDict(bothEnds=True, noSense=False, fullChrom=True,
264                           withWeight=True, withFlag=True, withMismatch=True, withID=True,
265                           withChrom=True, readIDDict=True)
266         self.assertEquals(1, len(reads))
267         self.assertEquals(1, len(reads["testRead"]))
268         read = reads["testRead"][0]
269         self.assertEquals(["readID", "weight", "stop", "mismatch","start", "flag","sense", "chrom"], read.keys())
270         self.assertEquals("testRead", read["readID"])
271         self.assertEquals(1.0, read["weight"])
272         self.assertEquals(100, read["stop"])
273         self.assertEquals("", read["mismatch"])
274         self.assertEquals(1, read["start"])
275         self.assertEquals("", read["flag"])
276         self.assertEquals("+", read["sense"])
277         self.assertEquals("chr1", read["chrom"])
278
279         self.assertEquals({}, self.rds.getReadsDict(hasMismatch=True))
280         self.assertEquals({}, self.rds.getReadsDict(strand="-"))
281         self.assertEquals(1, len(self.rds.getReadsDict(strand="+")))
282
283         rdsEntryList = [("testRead2", "chr1", 201, 300, "-", 1.0, "A", "G22A")]
284         self.rds.insertUniqs(rdsEntryList)
285         reads = self.rds.getReadsDict()
286         self.assertEquals(1, len(reads))
287         reads = self.rds.getReadsDict()
288         self.assertEquals(2, len(reads["1"]))
289         read = reads["1"][1]
290         self.assertEquals(201, read["start"])
291         reads = self.rds.getReadsDict(strand="+")
292         self.assertEquals(1, len(reads))
293         read = reads["1"][0]
294         self.assertEquals("+", read["sense"])
295         reads = self.rds.getReadsDict(strand="-")
296         self.assertEquals(1, len(reads))
297         reads = self.rds.getReadsDict(start=199)
298         self.assertEquals(1, len(reads["1"]))
299         reads = self.rds.getReadsDict(hasMismatch=True)
300         self.assertEquals(1, len(reads["1"]))
301
302         rdsEntryList = [("testMultiRead", "chr2", 101, 200, "+", 0.5, "", ""),
303                         ("testMultiRead", "chr2", 101, 200, "+", 0.5, "", "")]
304         self.rds.insertMulti(rdsEntryList)
305         reads = self.rds.getReadsDict()
306         self.assertEquals(1, len(reads))
307         reads = self.rds.getReadsDict(doMulti=True)
308         self.assertEquals(2, len(reads))
309         reads = self.rds.getReadsDict(doUniqs=False, doMulti=True)
310         self.assertFalse(reads.has_key("1"))
311
312
313     def testGetSplicesDict(self):
314         self.assertRaises(sqlite.OperationalError, self.rds.getSplicesDict)
315
316         rdsEntryList = [("testSpliceRead", "chr1", 1000, 1100, 1150, 1200, "+", 1.0, "", "")]
317         self.rnaRds.insertSplices(rdsEntryList)
318         reads = self.rnaRds.getSplicesDict()
319         self.assertEquals(1, len(reads))
320         self.assertEquals(1, len(reads["1"]))
321         read = reads["1"][0]
322         result = ["startR", "stopL", "sense", "startL", "stopR"]
323         self.assertEquals(result, read.keys())
324         self.assertEquals(1000, read["startL"])
325         self.assertEquals("+", read["sense"])
326         reads = self.rnaRds.getSplicesDict(splitRead=True)
327         self.assertEquals(2, len(reads["1"]))
328         self.assertEquals(1000, reads["1"][0]["startL"])
329         self.assertFalse(reads["1"][0].has_key("startR"))
330         self.assertFalse(reads["1"][0].has_key("stopR"))
331         self.assertEquals(1150, reads["1"][1]["startR"])
332         self.assertFalse(reads["1"][1].has_key("startL"))
333         self.assertFalse(reads["1"][1].has_key("stopL"))
334         self.assertEquals(reads["1"][0]["sense"], reads["1"][1]["sense"])
335
336         reads = self.rnaRds.getSplicesDict(noSense=False, fullChrom=True,
337                           withWeight=True, withFlag=True, withMismatch=True, withID=True,
338                           withChrom=True, readIDDict=True)
339         self.assertEquals(1, len(reads))
340         self.assertEquals(1, len(reads["testSpliceRead"]))
341         read = reads["testSpliceRead"][0]
342         result = ["readID", "weight", "startR", "mismatch","stopR", "stopL", "flag", "startL", "sense", "chrom"]
343         self.assertEquals(result, read.keys())
344         self.assertEquals("testSpliceRead", read["readID"])
345         self.assertEquals(1.0, read["weight"])
346         self.assertEquals(1150, read["startR"])
347         self.assertEquals("", read["mismatch"])
348         self.assertEquals(1200, read["stopR"])
349         self.assertEquals(1100, read["stopL"])
350         self.assertEquals("", read["flag"])
351         self.assertEquals(1000, read["startL"])
352         self.assertEquals("+", read["sense"])
353         self.assertEquals("chr1", read["chrom"])
354
355         self.assertEquals({}, self.rnaRds.getSplicesDict(hasMismatch=True))
356         self.assertEquals({}, self.rnaRds.getSplicesDict(strand="-"))
357         self.assertEquals(1, len(self.rnaRds.getSplicesDict(strand="+")))
358
359         rdsEntryList = [("testSpliceRead2", "chr1", 2000, 2100, 2150, 2200, "-", 1.0, "A", "G20T")]
360         self.rnaRds.insertSplices(rdsEntryList)
361         reads = self.rnaRds.getSplicesDict()
362         self.assertEquals(1, len(reads))
363         reads = self.rnaRds.getSplicesDict()
364         self.assertEquals(2, len(reads["1"]))
365         read = reads["1"][1]
366         self.assertEquals(2000, read["startL"])
367         reads = self.rnaRds.getSplicesDict(strand="+")
368         self.assertEquals(1, len(reads))
369         read = reads["1"][0]
370         self.assertEquals("+", read["sense"])
371         reads = self.rnaRds.getSplicesDict(strand="-")
372         self.assertEquals(1, len(reads))
373         reads = self.rnaRds.getSplicesDict(start=1199)
374         self.assertEquals(1, len(reads["1"]))
375         reads = self.rnaRds.getSplicesDict(hasMismatch=True)
376         self.assertEquals(1, len(reads["1"]))
377
378         rdsEntryList = [("testSpliceRead3", "chr2", 2000, 2100, 2150, 2200, "-", 1.0, "A", "G20T")]
379         self.rnaRds.insertSplices(rdsEntryList)
380         reads = self.rnaRds.getSplicesDict()
381         self.assertEquals(2, len(reads))
382         self.assertEquals(2, len(reads["1"]))
383         self.assertEquals(1, len(reads["2"]))
384         reads = self.rnaRds.getSplicesDict(withID=True, chrom="chr2")
385         self.assertFalse(reads.has_key("1"))
386         self.assertEquals("testSpliceRead3", reads["2"][0]["readID"])
387
388
389     def testGetCounts(self):
390         self.assertEquals(0, self.rds.getCounts())
391         self.assertEquals((0, 0, 0), self.rds.getCounts(multi=True, reportCombined=False))
392
393         rdsEntryList = [("testRead", "chr1", 1, 100, "+", 1.0, "", "")]
394         self.rds.insertUniqs(rdsEntryList)
395         self.assertEquals(1, self.rds.getCounts())
396         self.assertEquals((1, 0, 0), self.rds.getCounts(multi=True, reportCombined=False))
397
398         rdsEntryList = [("testMultiRead", "chr1", 101, 200, "+", 0.5, "", ""),
399                         ("testMultiRead", "chr1", 101, 200, "+", 0.5, "", "")]
400         self.rds.insertMulti(rdsEntryList)
401         self.assertEquals(2, self.rds.getCounts(multi=True))
402         self.assertEquals((1, 1, 0), self.rds.getCounts(multi=True, reportCombined=False))
403
404         self.assertEquals(1, self.rds.getCounts(chrom="chr1"))
405         self.assertEquals(0, self.rds.getCounts(chrom="chr2"))
406         self.assertEquals(1, self.rds.getCounts(rmin=1))
407         self.assertEquals(1, self.rds.getCounts(rmin=1, rmax=1000))
408         self.assertEquals(1, self.rds.getCounts(rmax=1000))
409         self.assertEquals(0, self.rds.getCounts(rmin=1000))
410         self.assertEquals(0, self.rds.getCounts(rmax=0))
411         self.assertEquals(1, self.rds.getCounts(sense="+"))
412         self.assertEquals(0, self.rds.getCounts(sense="-"))
413
414         self.assertEquals(0, self.rnaRds.getCounts())
415         rdsEntryList = [("testSpliceRead", "chr1", 1000, 1100, 1150, 1200, "+", 1.0, "", "")]
416         self.rnaRds.insertSplices(rdsEntryList)
417         self.assertEquals(1, self.rnaRds.getCounts(splices=True))
418
419
420     def testGetTotalCounts(self):
421         self.assertEquals(0, self.rds.getTotalCounts())
422
423         rdsEntryList = [("testRead", "chr1", 1, 100, "+", 1.0, "", "")]
424         self.rds.insertUniqs(rdsEntryList)
425         self.assertEquals(1, self.rds.getTotalCounts())
426
427         rdsEntryList = [("testMultiRead", "chr1", 101, 200, "+", 0.5, "", ""),
428                         ("testMultiRead", "chr1", 101, 200, "+", 0.5, "", "")]
429         self.rds.insertMulti(rdsEntryList)
430
431         self.assertEquals(2, self.rds.getTotalCounts())
432         self.assertEquals(2, self.rds.getTotalCounts(chrom="chr1"))
433         self.assertEquals(0, self.rds.getTotalCounts(chrom="chr2"))
434         self.assertEquals(2, self.rds.getTotalCounts(rmin=1))
435         self.assertEquals(2, self.rds.getTotalCounts(rmax=1000))
436         self.assertEquals(1, self.rds.getTotalCounts(rmin=101, rmax=1000))
437         self.assertEquals(1, self.rds.getTotalCounts(rmin=1, rmax=100))
438         self.assertEquals(0, self.rds.getTotalCounts(rmin=1000))
439         self.assertEquals(0, self.rds.getTotalCounts(rmax=0))
440
441
442     def testGetTableEntryCount(self):
443         table = "uniqs"
444         self.assertEquals(0, self.rds.getTableEntryCount(table))
445
446         rdsEntryList = [("testRead", "chr1", 1, 100, "+", 1.0, "", "")]
447         self.rds.insertUniqs(rdsEntryList)
448         self.assertEquals(1, self.rds.getTableEntryCount(table))
449
450         rdsEntryList = [("testMultiRead", "chr1", 101, 200, "+", 0.5, "", ""),
451                         ("testMultiRead", "chr1", 101, 200, "+", 0.5, "", "")]
452         self.rds.insertMulti(rdsEntryList)
453
454         self.assertEquals(1, self.rds.getTableEntryCount(table))
455         self.assertEquals(1, self.rds.getTableEntryCount(table, chrom="chr1"))
456         self.assertEquals(0, self.rds.getTableEntryCount(table, chrom="chr2"))
457         self.assertEquals(1, self.rds.getTableEntryCount(table, rmin=1))
458         self.assertEquals(1, self.rds.getTableEntryCount(table, rmax=1000))
459         self.assertEquals(0, self.rds.getTableEntryCount(table, rmin=101, rmax=1000))
460         self.assertEquals(0, self.rds.getTableEntryCount(table, rmin=1000))
461         self.assertEquals(0, self.rds.getTableEntryCount(table, rmax=0))
462         self.assertEquals(1, self.rds.getTableEntryCount(table, restrict=" sense ='+' "))
463         self.assertEquals(0, self.rds.getTableEntryCount(table, restrict=" sense ='-' "))
464         self.assertEquals(1, self.rds.getTableEntryCount(table, distinct=True))
465
466         table="multi"
467         self.assertEquals(1, self.rds.getTableEntryCount(table))
468         self.assertEquals(1, self.rds.getTableEntryCount(table, chrom="chr1"))
469         self.assertEquals(0, self.rds.getTableEntryCount(table, chrom="chr2"))
470         self.assertEquals(1, self.rds.getTableEntryCount(table, rmin=1))
471         self.assertEquals(1, self.rds.getTableEntryCount(table, rmax=1000))
472         self.assertEquals(1, self.rds.getTableEntryCount(table, rmin=101, rmax=1000))
473         self.assertEquals(0, self.rds.getTableEntryCount(table, rmin=1000))
474         self.assertEquals(0, self.rds.getTableEntryCount(table, rmax=0))
475         self.assertEquals(1, self.rds.getTableEntryCount(table, restrict=" sense ='+' "))
476         self.assertEquals(0, self.rds.getTableEntryCount(table, restrict=" sense ='-' "))
477         self.assertEquals(1, self.rds.getTableEntryCount(table, distinct=True))
478
479         rdsEntryList = [("testSpliceRead", "chr1", 1000, 1100, 1150, 1200, "+", 1.0, "", "")]
480         self.rnaRds.insertSplices(rdsEntryList)
481         table="splices"
482         self.assertEquals(1, self.rnaRds.getTableEntryCount(table))
483         self.assertEquals(1, self.rnaRds.getTableEntryCount(table, chrom="chr1"))
484         self.assertEquals(0, self.rnaRds.getTableEntryCount(table, chrom="chr2"))
485         self.assertEquals(1, self.rnaRds.getTableEntryCount(table, rmin=1, startField="startL"))
486         self.assertRaises(sqlite.OperationalError, self.rnaRds.getTableEntryCount, table, rmin=1)
487         self.assertEquals(1, self.rnaRds.getTableEntryCount(table, rmax=2000, startField="startL"))
488         self.assertRaises(sqlite.OperationalError, self.rnaRds.getTableEntryCount, table, rmax=2000)
489         self.assertEquals(0, self.rnaRds.getTableEntryCount(table, rmax=999, startField="startL"))
490         self.assertEquals(1, self.rnaRds.getTableEntryCount(table, rmin=1000, startField="startL"))
491         self.assertEquals(0, self.rnaRds.getTableEntryCount(table, rmax=0, startField="startL"))
492         self.assertEquals(1, self.rnaRds.getTableEntryCount(table, restrict=" sense ='+' "))
493         self.assertEquals(0, self.rnaRds.getTableEntryCount(table, restrict=" sense ='-' "))
494         self.assertEquals(1, self.rnaRds.getTableEntryCount(table, distinct=True, startField="startL"))
495         self.assertRaises(sqlite.OperationalError, self.rnaRds.getTableEntryCount, table, distinct=True)
496
497
498     def testGetUniqsCount(self):
499         self.assertEquals(0, self.rds.getUniqsCount())
500
501         rdsEntryList = [("testRead", "chr1", 1, 100, "+", 1.0, "", "")]
502         self.rds.insertUniqs(rdsEntryList)
503         self.assertEquals(1, self.rds.getUniqsCount())
504
505         rdsEntryList = [("testMultiRead", "chr1", 101, 200, "+", 0.5, "", ""),
506                         ("testMultiRead", "chr1", 101, 200, "+", 0.5, "", "")]
507         self.rds.insertMulti(rdsEntryList)
508
509         self.assertEquals(1, self.rds.getUniqsCount())
510         self.assertEquals(1, self.rds.getUniqsCount(chrom="chr1"))
511         self.assertEquals(0, self.rds.getUniqsCount(chrom="chr2"))
512         self.assertEquals(1, self.rds.getUniqsCount(rmin=1))
513         self.assertEquals(1, self.rds.getUniqsCount(rmax=1000))
514         self.assertEquals(0, self.rds.getUniqsCount(rmin=101, rmax=1000))
515         self.assertEquals(0, self.rds.getUniqsCount(rmin=1000))
516         self.assertEquals(0, self.rds.getUniqsCount(rmax=0))
517         self.assertEquals(1, self.rds.getUniqsCount(restrict=" sense ='+' "))
518         self.assertEquals(0, self.rds.getUniqsCount(restrict=" sense ='-' "))
519         self.assertEquals(1, self.rds.getUniqsCount(distinct=True))
520
521
522     def testGetSplicesCount(self):
523         self.assertEquals(0, self.rnaRds.getSplicesCount())
524
525         rdsEntryList = [("testRead", "chr1", 1, 100, "+", 1.0, "", "")]
526         self.rnaRds.insertUniqs(rdsEntryList)
527         self.assertEquals(0, self.rnaRds.getSplicesCount())
528
529         rdsEntryList = [("testMultiRead", "chr1", 101, 200, "+", 0.5, "", ""),
530                         ("testMultiRead", "chr1", 101, 200, "+", 0.5, "", "")]
531         self.rnaRds.insertMulti(rdsEntryList)
532         self.assertEquals(0, self.rnaRds.getSplicesCount())
533
534         rdsEntryList = [("testSpliceRead", "chr1", 1000, 1100, 1150, 1200, "+", 1.0, "", "")]
535         self.rnaRds.insertSplices(rdsEntryList)
536
537         self.assertEquals(1, self.rnaRds.getSplicesCount())
538         self.assertEquals(1, self.rnaRds.getSplicesCount(chrom="chr1"))
539         self.assertEquals(0, self.rnaRds.getSplicesCount(chrom="chr2"))
540         self.assertEquals(1, self.rnaRds.getSplicesCount(rmin=1))
541         self.assertEquals(1, self.rnaRds.getSplicesCount(rmax=2000))
542         self.assertEquals(0, self.rnaRds.getSplicesCount(rmax=999))
543         self.assertEquals(1, self.rnaRds.getSplicesCount(rmin=1000))
544         self.assertEquals(0, self.rnaRds.getSplicesCount(rmax=0))
545         self.assertEquals(1, self.rnaRds.getSplicesCount(restrict=" sense ='+' "))
546         self.assertEquals(0, self.rnaRds.getSplicesCount(restrict=" sense ='-' "))
547         self.assertEquals(1, self.rnaRds.getSplicesCount(distinct=True))
548
549
550     def testGetMultiCount(self):
551         self.assertEquals(0, self.rds.getMultiCount())
552
553         rdsEntryList = [("testRead", "chr1", 1, 100, "+", 1.0, "", "")]
554         self.rds.insertUniqs(rdsEntryList)
555         self.assertEquals(0, self.rds.getMultiCount())
556
557         rdsEntryList = [("testMultiRead", "chr1", 101, 200, "+", 0.5, "", ""),
558                         ("testMultiRead", "chr1", 101, 200, "+", 0.5, "", "")]
559         self.rds.insertMulti(rdsEntryList)
560
561         self.assertEquals(1, self.rds.getMultiCount())
562         self.assertEquals(1, self.rds.getMultiCount(chrom="chr1"))
563         self.assertEquals(0, self.rds.getMultiCount(chrom="chr2"))
564         self.assertEquals(1, self.rds.getMultiCount(rmin=1))
565         self.assertEquals(1, self.rds.getMultiCount(rmax=1000))
566         self.assertEquals(0, self.rds.getMultiCount(rmin=1, rmax=100))
567         self.assertEquals(0, self.rds.getMultiCount(rmin=1000))
568         self.assertEquals(0, self.rds.getMultiCount(rmax=0))
569         self.assertEquals(1, self.rds.getMultiCount(restrict=" sense ='+' "))
570         self.assertEquals(0, self.rds.getMultiCount(restrict=" sense ='-' "))
571         self.assertEquals(1, self.rds.getMultiCount(distinct=True))
572
573
574     def testGetReadIDs(self):
575         self.assertEquals([], self.rnaRds.getReadIDs())
576
577         rdsEntryList = [("testRead", "chr1", 1, 100, "+", 1.0, "", "")]
578         self.rnaRds.insertUniqs(rdsEntryList)
579         result = ["testRead"]
580         self.assertEquals(result, self.rnaRds.getReadIDs())
581
582         rdsEntryList = [("testMultiRead", "chr1", 101, 200, "+", 0.5, "", ""),
583                         ("testMultiRead", "chr1", 101, 200, "+", 0.5, "", "")]
584         self.rnaRds.insertMulti(rdsEntryList)
585         result = ["testRead"]
586         self.assertEquals(result, self.rnaRds.getReadIDs())
587         result = ["testMultiRead", "testRead"]
588         self.assertEquals(result, self.rnaRds.getReadIDs(multi=True))
589         
590         rdsEntryList = [("testRead2", "chr1", 201, 300, "+", 1.0, "", "")]
591         self.rnaRds.insertUniqs(rdsEntryList)
592         result = ["testRead", "testRead2"]
593         self.assertEquals(result, self.rnaRds.getReadIDs())
594         result = ["testRead"]
595         self.assertEquals(result, self.rnaRds.getReadIDs(limit=1))
596         result = ["testMultiRead"]
597         self.assertEquals(result, self.rnaRds.getReadIDs(multi=True, limit=1))
598
599         rdsEntryList = [("testPair/1", "chr1", 301, 400, "+", 1.0, "", "")]
600         self.rnaRds.insertUniqs(rdsEntryList)
601         result = ["testPair", "testRead", "testRead2"]
602         self.assertEquals(result, self.rnaRds.getReadIDs(paired=True))
603
604         rdsEntryList = [("testSpliceRead", "chr1", 1000, 1100, 1150, 1200, "+", 1.0, "", "")]
605         self.rnaRds.insertSplices(rdsEntryList)
606         result = ["testSpliceRead"]
607         self.assertEquals(result, self.rnaRds.getReadIDs(uniqs=False, splices=True))
608         result = ["testPair/1", "testRead", "testRead2", "testSpliceRead"]
609         self.assertEquals(result, self.rnaRds.getReadIDs(splices=True))
610
611
612     def testGetMismatches(self):
613         self.assertRaises(ReadDataset.ReadDatasetError, self.rds.getMismatches)
614         self.rds.insertMetadata([("readsize", "5")])
615
616         rdsEntryList = [("testRead", "chr1", 1, 5, "+", 1.0, "", "")]
617         self.rds.insertUniqs(rdsEntryList)
618         result = {"chr1": []}
619         self.assertEquals(result, self.rds.getMismatches())
620
621         rdsEntryList = [("testRead", "chr1", 1, 5, "+", 1.0, "", "C3T")]
622         self.rds.insertUniqs(rdsEntryList)
623         result = {"chr1": [[1, 3, "T", "C"]]}
624         self.assertEquals(result, self.rds.getMismatches())
625         result = {"chr2": []}
626         self.assertEquals(result, self.rds.getMismatches(mischrom="chr2"))
627
628         rdsEntryList = [("testRead", "chr1", 10, 15, "+", 1.0, "", "C3T")]
629         self.rds.insertUniqs(rdsEntryList)
630         result = {"chr1": [[1, 3, "T", "C"], [10, 12, "T", "C"]]}
631         self.assertEquals(result, self.rds.getMismatches())
632
633         rdsEntryList = [("testRead", "chr2", 10, 15, "+", 1.0, "", "C3T")]
634         self.rds.insertUniqs(rdsEntryList)
635         result = {"chr1": [[1, 3, "T", "C"], [10, 12, "T", "C"]],
636                   "chr2": [[10, 12, "T", "C"]]}
637         self.assertEquals(result, self.rds.getMismatches())
638
639         rdsEntryList = [("testSpliceRead", "chr1", 1000, 1100, 1150, 1200, "+", 1.0, "", "C41T")]
640         self.rnaRds.insertSplices(rdsEntryList)
641         self.rnaRds.insertMetadata([("readsize", "150")])
642         result = {"chr1": [[1000, 1040, "T", "C"]]}
643         #TODO: This test case fails.  If there are only splice entries for a chromosome it shouldn't
644         #      be necessary to specify the chromosome.
645         #self.assertEquals(result, self.rnaRds.getMismatches())
646         self.assertEquals(result, self.rnaRds.getMismatches(mischrom="chr1"))
647
648
649     #TODO: needs fixing up
650     def testGetChromProfile(self):
651         chromProfile = self.rds.getChromProfile("chr1")
652         result = []
653         self.assertEquals(result, chromProfile.tolist())
654
655         rdsEntryList = [("testRead", "chr1", 1, 5, "+", 1.0, "", "")]
656         self.rds.insertUniqs(rdsEntryList)
657         chromProfile = self.rds.getChromProfile("chr1")
658         result = []
659         self.assertEquals(result, chromProfile.tolist())
660
661         self.rds.insertMetadata([("readsize", "5")])
662         chromProfile = self.rds.getChromProfile("chr1")
663         result = [0.0, 1.0, 1.0, 1.0, 1.0]
664         self.assertEquals(result, chromProfile.tolist())
665
666         rdsEntryList = [("testRead2", "chr1", 7, 11, "+", 1.0, "", "")]
667         self.rds.insertUniqs(rdsEntryList)
668         # This doesn't seem to make sense the default behavior is to only get the first readlen bases
669         chromProfile = self.rds.getChromProfile("chr1")
670         result = [0.0, 1.0, 1.0, 1.0, 1.0]
671         self.assertEquals(result, chromProfile.tolist())
672
673         # as it stands this doesn't see right either.  Getting an indexError at currentpos 5.
674         chromProfile = self.rds.getChromProfile("chr1", cstop=11)
675         result = [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
676         self.assertEquals(result, chromProfile.tolist())
677
678
679     def testInsertMetadata(self):
680         result = {}
681         self.assertEquals(result, self.rds.getMetadata("testMeta"))
682
683         self.rds.insertMetadata([("testMeta", "100")])
684         result = {"testMeta": "100"}
685         self.assertEquals(result, self.rds.getMetadata("testMeta"))
686
687         self.rds.insertMetadata([("testMeta", "200")])
688         result = {"testMeta:2": "200", "testMeta": "100"}
689         self.assertEquals(result, self.rds.getMetadata("testMeta"))
690
691
692     def testUpdateMetadata(self):
693         result = {}
694         self.assertEquals(result, self.rds.getMetadata("testMeta"))
695
696         self.rds.insertMetadata([("testMeta", "100")])
697         result = {"testMeta": "100"}
698         self.assertEquals(result, self.rds.getMetadata("testMeta"))
699
700         self.rds.updateMetadata("testMeta", "200")
701         result = {"testMeta": "200"}
702         self.assertEquals(result, self.rds.getMetadata("testMeta"))
703
704         self.rds.updateMetadata("testMeta", "300", "200")
705         result = {"testMeta": "300"}
706         self.assertEquals(result, self.rds.getMetadata("testMeta"))
707
708         self.rds.updateMetadata("testMeta", "200", "200")
709         result = {"testMeta": "300"}
710         self.assertEquals(result, self.rds.getMetadata("testMeta"))
711
712
713     def testFlagReads(self):
714         readData = self.rnaRds.getReadsDict(withFlag=True)
715         self.assertEquals({}, readData)
716
717         rdsEntryList = [("testRead", "chr1", 1, 100, "+", 1.0, "", "")]
718         self.rnaRds.insertUniqs(rdsEntryList)
719         result = [""]
720         flags = self.getRDSFlags("1", self.rnaRds)
721         self.assertEquals(result, flags)
722
723         regions = [()]
724         self.assertRaises(sqlite.ProgrammingError, self.rnaRds.flagReads, regions)
725
726         regions = [("test", "chr1", "0", "1000")]
727         self.rnaRds.flagReads(regions)
728         result = ["test"]
729         flags = self.getRDSFlags("1", self.rnaRds)
730         self.assertEquals(result, flags)
731
732         regions = [("test2", "chr1", "600", "1000")]
733         self.rnaRds.flagReads(regions)
734         result = ["test"]
735         flags = self.getRDSFlags("1", self.rnaRds)
736         self.assertEquals(result, flags)
737
738         rdsEntryList = [("testRead2", "chr1", 101, 200, "+", 1.0, "", "")]
739         self.rnaRds.insertUniqs(rdsEntryList)
740         regions = [("test2", "chr1", "101", "1000")]
741         self.rnaRds.flagReads(regions)
742         result = ["test", "test2"]
743         flags = self.getRDSFlags("1", self.rnaRds)
744         self.assertEquals(result, flags)
745
746         rdsEntryList = [("testMultiRead", "chr1", 201, 300, "+", 0.5, "", ""),
747                         ("testMultiRead", "chr1", 201, 300, "+", 0.5, "", "")]
748         self.rnaRds.insertMulti(rdsEntryList)
749         regions = [("test", "chr1", "0", "1000")]
750         self.rnaRds.flagReads(regions)
751         result = ["test", "test", "", ""]
752         flags = self.getRDSFlags("1", self.rnaRds, doMulti=True)
753         self.assertEquals(result, flags)
754
755         regions = [("multi", "chr1", "1", "1000")]
756         self.rnaRds.flagReads(regions, uniqs=False, multi=True)
757         result = ["test", "test", "multi", "multi"]
758         flags = self.getRDSFlags("1", self.rnaRds, doMulti=True)
759         self.assertEquals(result, flags)
760
761         rdsEntryList = [("testSpliceRead", "chr1", 1000, 1100, 1150, 1200, "+", 1.0, "", "")]
762         self.rnaRds.insertSplices(rdsEntryList)
763         regions = [("test", "chr1", "0", "1500")]
764         self.rnaRds.flagReads(regions)
765         result = ["", "test", "test", "multi", "multi"]
766         flags = self.getRDSFlags("1", self.rnaRds, doMulti=True, splice=True)
767         self.assertEquals(result, flags)
768
769         regions = [("splice", "chr1", "1", "1500")]
770         self.rnaRds.flagReads(regions, uniqs=False, multi=False, splices=True)
771         result = [" L:splice R:splice", "test", "test", "multi", "multi"]
772         flags = self.getRDSFlags("1", self.rnaRds, doMulti=True, splice=True)
773         self.assertEquals(result, flags)
774
775         rdsEntryList = [("testNegSense", "chr1", 301, 400, "-", 1.0, "", "")]
776         self.rnaRds.insertUniqs(rdsEntryList)
777         regions = [("test", "chr1", "0", "1500", "+")]
778         self.rnaRds.flagReads(regions, sense="anythingBut'Both'")
779         result = ["test", "test", ""]
780         flags = self.getRDSFlags("1", self.rnaRds)
781         self.assertEquals(result, flags)
782
783         regions = [("neg", "chr1", "0", "1500", "-")]
784         self.rnaRds.flagReads(regions, sense="anythingBut'Both'")
785         result = ["test", "test", "neg"]
786         flags = self.getRDSFlags("1", self.rnaRds)
787         self.assertEquals(result, flags)
788
789
790     def getRDSFlags(self, chromosome, rds, doMulti=False, splice=False):
791         if splice:
792             readData = rds.getSplicesDict(withFlag=True)
793         else:
794             readData = rds.getReadsDict(withFlag=True, doMulti=doMulti)
795
796         flags = []
797         for read in readData[chromosome]:
798             flags.append(read["flag"])
799
800         if splice:
801             nonSplice = self.getRDSFlags(chromosome, rds, doMulti, splice=False)
802             for flag in nonSplice:
803                 flags.append(flag)
804
805         return flags
806
807
808     def testSetFlags(self):
809         rdsEntryList = [("test", "chr1", 1, 100, "+", 1.0, "uniq", "")]
810         self.rds.insertUniqs(rdsEntryList)
811         self.rnaRds.insertUniqs(rdsEntryList)
812         rdsEntryList = [("testMultiRead", "chr1", 201, 300, "+", 0.5, "multi", ""),
813                         ("testMultiRead", "chr1", 201, 300, "+", 0.5, "multi", "")]
814         self.rnaRds.insertMulti(rdsEntryList)
815         rdsEntryList = [("testSpliceRead", "chr1", 1000, 1100, 1150, 1200, "+", 1.0, "splice", "")]
816         self.rnaRds.insertSplices(rdsEntryList)
817
818         result = ["reset"]
819         self.rds.setFlags("reset")
820         flags = self.getRDSFlags("1", self.rds)
821         self.assertEquals(result, flags)
822
823         result = ["splice", "uniq", "resetMulti", "resetMulti"]
824         self.rnaRds.setFlags("resetMulti", uniqs=False, splices=False)
825         flags = self.getRDSFlags("1", self.rnaRds, doMulti=True, splice=True)
826         self.assertEquals(result, flags)
827
828         result = ["resetAll", "resetAll", "resetAll", "resetAll"]
829         self.rnaRds.setFlags("resetAll")
830         flags = self.getRDSFlags("1", self.rnaRds, doMulti=True, splice=True)
831         self.assertEquals(result, flags)
832
833
834     def testResetFlags(self):
835         rdsEntryList = [("test", "chr1", 1, 100, "+", 1.0, "uniq", "")]
836         self.rds.insertUniqs(rdsEntryList)
837         self.rnaRds.insertUniqs(rdsEntryList)
838         rdsEntryList = [("testMultiRead", "chr1", 201, 300, "+", 0.5, "multi", ""),
839                         ("testMultiRead", "chr1", 201, 300, "+", 0.5, "multi", "")]
840         self.rnaRds.insertMulti(rdsEntryList)
841         rdsEntryList = [("testSpliceRead", "chr1", 1000, 1100, 1150, 1200, "+", 1.0, "splice", "")]
842         self.rnaRds.insertSplices(rdsEntryList)
843
844         self.rds.resetFlags()
845         result = [""]
846         flags = self.getRDSFlags("1", self.rds)
847         self.assertEquals(result, flags)
848
849         self.rnaRds.resetFlags()
850         result = ["", "", ""]
851         flags = self.getRDSFlags("1", self.rnaRds, doMulti=True)
852         self.assertEquals(result, flags)
853
854         self.rnaRds.resetFlags()
855         result = ["", ""]
856         flags = self.getRDSFlags("1", self.rnaRds, splice=True)
857         self.assertEquals(result, flags)
858
859
860     def testReweighMultireads(self):
861         rdsEntryList = [("testMultiRead", "chr1", 201, 300, "+", 0.5, "multi", ""),
862                         ("testMultiRead", "chr1", 201, 300, "+", 0.5, "multi", "")]
863         self.rds.insertMulti(rdsEntryList)
864         readData = ("0.25", "chr1", "201", "testMultiRead")
865         self.rds.reweighMultireads([readData])
866         readDict = self.rds.getReadsDict(withWeight=True, doMulti=True)
867         read = readDict["1"][0]
868         self.assertEquals(0.25, read["weight"])
869
870
871     #TODO: write unit test
872     def testSetSynchronousPragma(self):
873         pass
874
875
876     #TODO: write unit test
877     def testSetDBcache(self):
878         pass
879
880
881     #TODO: write unit test
882     def testExecute(self):
883         pass
884
885
886     #TODO: write unit test
887     def testExecuteCommit(self):
888         pass
889
890
891     def testBuildIndex(self):
892         self.assertFalse(self.rds.hasIndex())
893         self.rds.buildIndex()
894         self.assertTrue(self.rds.hasIndex())
895
896
897     def testDropIndex(self):
898         self.assertFalse(self.rds.hasIndex())
899         self.rds.buildIndex()
900         self.assertTrue(self.rds.hasIndex())
901         self.rds.dropIndex()
902         self.assertFalse(self.rds.hasIndex())
903
904         self.assertFalse(self.rnaRds.hasIndex())
905         self.rnaRds.buildIndex()
906         self.assertTrue(self.rnaRds.hasIndex())
907         self.rnaRds.dropIndex()
908         self.assertFalse(self.rnaRds.hasIndex())
909
910
911     #TODO: write unit test
912     def testMemSync(self):
913         pass
914
915
916     #TODO: write unit test
917     def testCopyDBEntriesToMemory(self):
918         pass
919
920
921     #TODO: write unit test
922     def testCopySpliceDBEntriesToMemory(self):
923         pass
924
925
926 def suite():
927     suite = unittest.TestSuite()
928     suite.addTest(unittest.makeSuite(TestReadDataset))
929
930     return suite
931
932
933 if __name__ == "__main__":
934     #import sys;sys.argv = ['', 'Test.testName']
935     unittest.main()