2 Created on Aug 19, 2010
6 Located feature 728439 by:
7 from Erange.commoncode import getFeaturesByChromDict
8 genome = Genome(self.genomeName)
9 featuresByChromDict = getFeaturesByChromDict(genome)
10 print featuresByChromDict["1"][:3]
15 from erange import geneMrnaCounts
16 from cistematic.core.geneinfo import geneinfoDB
17 from cistematic.genomes import Genome
18 from erange import ReadDataset
21 class TestGeneMrnaCounts(unittest.TestCase):
22 idb = geneinfoDB(cache=True)
23 testDBName = "testRDS.rds"
24 genomeName = "hsapiens"
25 outfilename = "testGeneMrnaCounts.txt"
28 self.rds = ReadDataset.ReadDataset(self.testDBName, initialize=True, datasetType="RNA", verbose=False)
33 os.remove(self.testDBName)
36 def testGeneMrnaCounts(self):
37 geneMrnaCounts.geneMrnaCounts(self.genomeName, self.testDBName, self.outfilename)
38 outfile = open(self.outfilename, "r")
40 fields = line.split("\t")
41 self.assertEquals("0\n", fields[2])
44 os.remove(self.outfilename)
46 rdsEntryList = [("testRead", "chr1", 18700, 18800, "+", 1.0, "", "")]
47 self.rds.insertUniqs(rdsEntryList)
48 geneMrnaCounts.geneMrnaCounts(self.genomeName, self.testDBName, self.outfilename)
49 possibleCounts = ["0\n", "1\n"]
50 outfile = open(self.outfilename, "r")
52 fields = line.split("\t")
53 self.assertTrue(fields[2] in possibleCounts)
56 os.remove(self.outfilename)
58 geneMrnaCounts.geneMrnaCounts(self.genomeName, self.testDBName, self.outfilename,
59 markGID=True, trackStrand=True)
61 possibleCounts = ["0\n", "1\n"]
62 outfile = open(self.outfilename, "r")
64 fields = line.split("\t")
65 self.assertTrue(fields[2] in possibleCounts)
68 os.remove(self.outfilename)
69 reads = self.rds.getReadsDict(withFlag=True)
70 self.assertEquals("728439", reads["1"][0]["flag"])
72 geneMrnaCounts.geneMrnaCounts(self.genomeName, self.testDBName, self.outfilename,
73 countFeats=True, markGID=True, cachePages=150000)
75 possibleCounts = ["0\n", "1\n"]
76 outfile = open(self.outfilename, "r")
78 fields = line.split("\t")
79 self.assertTrue(fields[2] in possibleCounts)
82 os.remove(self.outfilename)
83 reads = self.rds.getReadsDict(withFlag=True)
84 self.assertEquals("728439", reads["1"][0]["flag"])
87 def testCountFeatures(self):
89 self.assertEquals(0, geneMrnaCounts.countFeatures(testDict))
91 testDict = {"chr1": []}
92 self.assertEquals(0, geneMrnaCounts.countFeatures(testDict))
94 #TODO: This is likely not the result we want
95 testDict = {"chr1": "not a list"}
96 self.assertEquals(10, geneMrnaCounts.countFeatures(testDict))
98 testDict = {"chr1": 10}
99 self.assertEquals(0, geneMrnaCounts.countFeatures(testDict))
101 testDict = {"chr1": 10,
103 self.assertEquals(1, geneMrnaCounts.countFeatures(testDict))
105 testDict = {"chr1": ["f1", "f2"]}
106 self.assertEquals(2, geneMrnaCounts.countFeatures(testDict))
108 testDict = {"chr1": ["f1", "f2"],
110 self.assertEquals(2, geneMrnaCounts.countFeatures(testDict))
112 testDict = {"chr1": ["f1", "f2"],
114 self.assertEquals(3, geneMrnaCounts.countFeatures(testDict))
117 def testGetGeneSymbol(self):
118 # Case: Null/None inputs
125 self.assertEquals("LOC", geneMrnaCounts.getGeneSymbol(gid, searchGID, geneInfoDict, idb, genomeName, geneAnnotDict))
127 # Case: symbol is in geneInfoDict
130 geneInfoDict = {"1": [["gene1", "wrong name"], ["wrong name 2"]]}
133 geneAnnotDict = {("test", "1"): ["wrong name 3"]}
134 self.assertEquals("gene1", geneMrnaCounts.getGeneSymbol(gid, searchGID, geneInfoDict, idb, genomeName, geneAnnotDict))
136 # Case: symbol not in geneInfoDict, is in geneAnnotDict
139 geneInfoDict = {"0": [["wrong name"], ["wrong name 2"]]}
142 geneAnnotDict = {("test", "1"): ["gene1"]}
143 self.assertEquals("gene1", geneMrnaCounts.getGeneSymbol(gid, searchGID, geneInfoDict, idb, genomeName, geneAnnotDict))
145 # Case: symbol not in geneInfoDict or geneAnnotDict - non-null/None inputs
148 geneInfoDict = {"0": [["wrong name"], ["wrong name 2"]]}
151 geneAnnotDict = {("test", "0"): ["wrong name 3"]}
152 self.assertEquals("LOC1", geneMrnaCounts.getGeneSymbol(gid, searchGID, geneInfoDict, idb, genomeName, geneAnnotDict))
154 # Case: using search, gid not in idb
155 gid = "almostCertainlyNotInTheIDB"
157 geneInfoDict = {"0": [["wrong name"], ["wrong name 2"]]}
160 geneAnnotDict = {("human", "0"): ["wrong name 3"]}
161 self.assertEquals("LOCalmostCertainlyNotInTheIDB", geneMrnaCounts.getGeneSymbol(gid, searchGID, geneInfoDict, idb, genomeName, geneAnnotDict))
164 # sql to get gid: select gID from gene_info where genome="human" and locustag !="-" and locustag != symbol limit 5;
167 geneInfoDict = {"27": [["correct"], ["wrong name 2"]]}
170 geneAnnotDict = {("human", "0"): ["wrong name 3"]}
171 self.assertEquals("correct", geneMrnaCounts.getGeneSymbol(gid, searchGID, geneInfoDict, idb, genomeName, geneAnnotDict))
174 def testWriteOutputFile(self):
175 genome = Genome(self.genomeName)
176 gidList = ["RP11-177A2.3"]
177 gidCount = {"RP11-177A2.3": 1}
178 geneMrnaCounts.writeOutputFile(self.outfilename, genome, gidList, gidCount, searchGID=False)
180 outfile = open(self.outfilename, "r")
181 line = outfile.readline()
182 result = "RP11-177A2.3\tLOCRP11-177A2.3\t1\n"
183 self.assertEquals(result, line)
185 os.remove(self.outfilename)
187 genome = Genome("hsapiens")
188 gidList = ["RP11-177A2.3"]
189 gidCount = {"something else": 1}
190 geneMrnaCounts.writeOutputFile(self.outfilename, genome, gidList, gidCount, searchGID=False)
192 outfile = open(self.outfilename, "r")
193 line = outfile.readline()
194 result = "RP11-177A2.3\tLOCRP11-177A2.3\t0\n"
195 self.assertEquals(result, line)
197 os.remove(self.outfilename)
200 argv = ["geneMRNACounts", self.genomeName, self.testDBName, self.outfilename]
201 geneMrnaCounts.main(argv)
202 outfile = open(self.outfilename, "r")
204 fields = line.split("\t")
205 self.assertEquals("0\n", fields[2])
208 os.remove(self.outfilename)
212 suite = unittest.TestSuite()
213 suite.addTest(unittest.makeSuite(TestGeneMrnaCounts))
218 if __name__ == "__main__":
219 #import sys;sys.argv = ['', 'Test.testName']