erange version 4.0a dev release
[erange.git] / test / testGeneMrnaCounts.py
1 '''
2 Created on Aug 19, 2010
3
4 @author: sau
5
6 Located feature 728439 by:
7     from Erange.commoncode import getFeaturesByChromDict
8     genome = Genome(self.genomeName)
9     featuresByChromDict = getFeaturesByChromDict(genome)
10     print featuresByChromDict["1"][:3]
11
12 '''
13 import unittest
14 import os
15 from erange import geneMrnaCounts
16 from cistematic.core.geneinfo import geneinfoDB
17 from cistematic.genomes import Genome
18 from erange import ReadDataset
19
20
21 class TestGeneMrnaCounts(unittest.TestCase):
22     idb = geneinfoDB(cache=True)
23     testDBName = "testRDS.rds"
24     genomeName = "hsapiens"
25     outfilename = "testGeneMrnaCounts.txt"
26
27     def setUp(self):
28         self.rds = ReadDataset.ReadDataset(self.testDBName, initialize=True, datasetType="RNA", verbose=False)
29
30
31     def tearDown(self):
32         del(self.rds)
33         os.remove(self.testDBName)
34
35
36     def testGeneMrnaCounts(self):
37         geneMrnaCounts.geneMrnaCounts(self.genomeName, self.testDBName, self.outfilename)
38         outfile = open(self.outfilename, "r")
39         for line in outfile:
40             fields = line.split("\t")
41             self.assertEquals("0\n", fields[2])
42
43         outfile.close()
44         os.remove(self.outfilename)
45
46         rdsEntryList = [("testRead", "chr1", 18700, 18800, "+", 1.0, "", "")]
47         self.rds.insertUniqs(rdsEntryList)
48         geneMrnaCounts.geneMrnaCounts(self.genomeName, self.testDBName, self.outfilename)
49         possibleCounts = ["0\n", "1\n"]
50         outfile = open(self.outfilename, "r")
51         for line in outfile:
52             fields = line.split("\t")
53             self.assertTrue(fields[2] in possibleCounts)
54
55         outfile.close()
56         os.remove(self.outfilename)
57
58         geneMrnaCounts.geneMrnaCounts(self.genomeName, self.testDBName, self.outfilename,
59                                       markGID=True, trackStrand=True)
60         
61         possibleCounts = ["0\n", "1\n"]
62         outfile = open(self.outfilename, "r")
63         for line in outfile:
64             fields = line.split("\t")
65             self.assertTrue(fields[2] in possibleCounts)
66
67         outfile.close()
68         os.remove(self.outfilename)
69         reads = self.rds.getReadsDict(withFlag=True)
70         self.assertEquals("728439", reads["1"][0]["flag"])
71
72         geneMrnaCounts.geneMrnaCounts(self.genomeName, self.testDBName, self.outfilename,
73                                       countFeats=True, markGID=True, cachePages=150000)
74
75         possibleCounts = ["0\n", "1\n"]
76         outfile = open(self.outfilename, "r")
77         for line in outfile:
78             fields = line.split("\t")
79             self.assertTrue(fields[2] in possibleCounts)
80
81         outfile.close()
82         os.remove(self.outfilename)
83         reads = self.rds.getReadsDict(withFlag=True)
84         self.assertEquals("728439", reads["1"][0]["flag"])
85
86
87     def testCountFeatures(self):
88         testDict = {}
89         self.assertEquals(0, geneMrnaCounts.countFeatures(testDict))
90
91         testDict = {"chr1": []}
92         self.assertEquals(0, geneMrnaCounts.countFeatures(testDict))
93
94         #TODO: This is likely not the result we want
95         testDict = {"chr1": "not a list"}
96         self.assertEquals(10, geneMrnaCounts.countFeatures(testDict))
97
98         testDict = {"chr1": 10}
99         self.assertEquals(0, geneMrnaCounts.countFeatures(testDict))
100
101         testDict = {"chr1": 10,
102                     "chr2": ["f1"]}
103         self.assertEquals(1, geneMrnaCounts.countFeatures(testDict))
104
105         testDict = {"chr1": ["f1", "f2"]}
106         self.assertEquals(2, geneMrnaCounts.countFeatures(testDict))
107
108         testDict = {"chr1": ["f1", "f2"],
109                     "chr2": []}
110         self.assertEquals(2, geneMrnaCounts.countFeatures(testDict))
111
112         testDict = {"chr1": ["f1", "f2"],
113                     "chr2": ["f1"]}
114         self.assertEquals(3, geneMrnaCounts.countFeatures(testDict))
115
116
117     def testGetGeneSymbol(self):
118         # Case: Null/None inputs
119         gid = ""
120         searchGID = False
121         geneInfoDict = {}
122         idb = None
123         genomeName = ""
124         geneAnnotDict = {}
125         self.assertEquals("LOC", geneMrnaCounts.getGeneSymbol(gid, searchGID, geneInfoDict, idb, genomeName, geneAnnotDict))
126
127         # Case: symbol is in geneInfoDict
128         gid = "1"
129         searchGID = False
130         geneInfoDict = {"1": [["gene1", "wrong name"], ["wrong name 2"]]}
131         idb = None
132         genomeName = "test"
133         geneAnnotDict = {("test", "1"): ["wrong name 3"]}
134         self.assertEquals("gene1", geneMrnaCounts.getGeneSymbol(gid, searchGID, geneInfoDict, idb, genomeName, geneAnnotDict))
135
136         # Case: symbol not in geneInfoDict, is in geneAnnotDict
137         gid = "1"
138         searchGID = False
139         geneInfoDict = {"0": [["wrong name"], ["wrong name 2"]]}
140         idb = None
141         genomeName = "test"
142         geneAnnotDict = {("test", "1"): ["gene1"]}
143         self.assertEquals("gene1", geneMrnaCounts.getGeneSymbol(gid, searchGID, geneInfoDict, idb, genomeName, geneAnnotDict))
144
145         # Case: symbol not in geneInfoDict or geneAnnotDict - non-null/None inputs
146         gid = "1"
147         searchGID = False
148         geneInfoDict = {"0": [["wrong name"], ["wrong name 2"]]}
149         idb = None
150         genomeName = "test"
151         geneAnnotDict = {("test", "0"): ["wrong name 3"]}
152         self.assertEquals("LOC1", geneMrnaCounts.getGeneSymbol(gid, searchGID, geneInfoDict, idb, genomeName, geneAnnotDict))
153
154         # Case: using search, gid not in idb
155         gid = "almostCertainlyNotInTheIDB"
156         searchGID = True
157         geneInfoDict = {"0": [["wrong name"], ["wrong name 2"]]}
158         idb = self.idb
159         genomeName = "human"
160         geneAnnotDict = {("human", "0"): ["wrong name 3"]}
161         self.assertEquals("LOCalmostCertainlyNotInTheIDB", geneMrnaCounts.getGeneSymbol(gid, searchGID, geneInfoDict, idb, genomeName, geneAnnotDict))
162
163         # Case: using search
164         # sql to get gid: select gID from gene_info where genome="human" and locustag !="-" and locustag != symbol limit 5;
165         gid = "RP11-177A2.3"
166         searchGID = True
167         geneInfoDict = {"27": [["correct"], ["wrong name 2"]]}
168         idb = self.idb
169         genomeName = "human"
170         geneAnnotDict = {("human", "0"): ["wrong name 3"]}
171         self.assertEquals("correct", geneMrnaCounts.getGeneSymbol(gid, searchGID, geneInfoDict, idb, genomeName, geneAnnotDict))
172
173
174     def testWriteOutputFile(self):
175         genome = Genome(self.genomeName)
176         gidList = ["RP11-177A2.3"]
177         gidCount = {"RP11-177A2.3": 1}
178         geneMrnaCounts.writeOutputFile(self.outfilename, genome, gidList, gidCount, searchGID=False)
179
180         outfile = open(self.outfilename, "r")
181         line = outfile.readline()
182         result = "RP11-177A2.3\tLOCRP11-177A2.3\t1\n"
183         self.assertEquals(result, line)
184         outfile.close()
185         os.remove(self.outfilename)
186
187         genome = Genome("hsapiens")
188         gidList = ["RP11-177A2.3"]
189         gidCount = {"something else": 1}
190         geneMrnaCounts.writeOutputFile(self.outfilename, genome, gidList, gidCount, searchGID=False)
191
192         outfile = open(self.outfilename, "r")
193         line = outfile.readline()
194         result = "RP11-177A2.3\tLOCRP11-177A2.3\t0\n"
195         self.assertEquals(result, line)
196         outfile.close()
197         os.remove(self.outfilename)
198
199     def testMain(self):
200         argv = ["geneMRNACounts", self.genomeName, self.testDBName, self.outfilename]
201         geneMrnaCounts.main(argv)
202         outfile = open(self.outfilename, "r")
203         for line in outfile:
204             fields = line.split("\t")
205             self.assertEquals("0\n", fields[2])
206
207         outfile.close()
208         os.remove(self.outfilename)
209
210
211 def suite():
212     suite = unittest.TestSuite()
213     suite.addTest(unittest.makeSuite(TestGeneMrnaCounts))
214
215     return suite
216
217
218 if __name__ == "__main__":
219     #import sys;sys.argv = ['', 'Test.testName']
220     unittest.main()