erange version 4.0a dev release
[erange.git] / test / testGetFasta.py
1 '''
2 Created on Aug 27, 2010
3
4 @author: sau
5 '''
6 import unittest
7 import os
8 from erange import getfasta
9 #from erange import ReadDataset
10 from erange import ReadDataset
11
12 testDBName = "testRDS.rds"
13
14
15 class TestGetFasta(unittest.TestCase):
16
17
18     def setUp(self):
19         self.regionDict = {}
20         self.minHitThresh = -1
21         self.maxsize = 3000
22         self.outfilename = "testFileForTestGetFasta.fa"
23
24
25     def tearDown(self):
26         try:
27             os.remove(self.outfilename)
28         except OSError:
29             print "fasta file does not exist"
30
31         try:
32             os.remove(testDBName)
33         except OSError:
34             print "RDS file does not exist"
35
36
37     def testGetDefaultRegion(self):
38         self.assertEquals({}, getfasta.getDefaultRegion(self.regionDict, self.maxsize))
39
40         regionDict = {"1": [],
41                       "2": []
42         }
43         result = {"2": [],
44                   "1": []
45         }
46         self.assertEquals(result, getfasta.getDefaultRegion(regionDict, self.maxsize))
47
48         regionDict = {"1": [(10, 20, 10)],
49                       "2": []
50         }
51         result = {"2": [],
52                   "1": [{"start": 10, "length": 10, "topPos": [-1]}]
53         }
54         self.assertEquals(result, getfasta.getDefaultRegion(regionDict, self.maxsize))
55
56         regionDict = {"1": [(10, 20, 10)],
57                       "2": [(11, 21, 11)]
58         }
59         result = {"2": [{"start": 11, "length": 11, "topPos": [-1]}],
60                   "1": [{"start": 10, "length": 10, "topPos": [-1]}]
61         }
62         self.assertEquals(result, getfasta.getDefaultRegion(regionDict, self.maxsize))
63
64         regionDict = {"1": [(10, 20, 10), (100, 4000, 3900)],
65                       "2": [(11, 21, 11)]
66         }
67         result = {"2": [{"start": 11, "length": 11, "topPos": [-1]}],
68                   "1": [{"start": 10, "length": 10, "topPos": [-1]}]
69         }
70         self.assertEquals(result, getfasta.getDefaultRegion(regionDict, self.maxsize))
71
72         regionDict = {"1": [(10, 20, 10), (100, 4000, 3900), (50, 60, 10)],
73                       "2": [(11, 21, 11)]
74         }
75         result = {"2": [{"start": 11, "length": 11, "topPos": [-1]}],
76                   "1": [{"start": 10, "length": 10, "topPos": [-1]},
77                         {"start": 50, "length": 10, "topPos": [-1]}]
78         }
79         self.assertEquals(result, getfasta.getDefaultRegion(regionDict, self.maxsize))
80
81
82     def testGetRegionUsingPeaks(self):
83         self.assertEquals({}, getfasta.getRegionUsingPeaks(self.regionDict, self.minHitThresh, self.maxsize))
84
85         regionDict = {"1": [],
86                       "2": []
87         }
88         result = {"2": [],
89                   "1": []
90         }
91         self.assertEquals(result, getfasta.getRegionUsingPeaks(regionDict, self.minHitThresh, self.maxsize))
92
93         regionDict = {"1": [(10, 20, 10, 15, 1)],
94                       "2": []
95         }
96         result = {"2": [],
97                   "1": [{"start": 10, "length": 10, "topPos": [5]}]
98         }
99         self.assertEquals(result, getfasta.getRegionUsingPeaks(regionDict, self.minHitThresh, self.maxsize))
100
101         result = {"2": [],
102                   "1": []
103         }
104         self.assertEquals(result, getfasta.getRegionUsingPeaks(regionDict, 3, self.maxsize))
105
106         regionDict = {"1": [(10, 20, 10, 15, 1)],
107                       "2": [(11, 21, 11, 18, 1)]
108         }
109         result = {"2": [{"start": 11, "length": 11, "topPos": [7]}],
110                   "1": [{"start": 10, "length": 10, "topPos": [5]}]
111         }
112         self.assertEquals(result, getfasta.getRegionUsingPeaks(regionDict, self.minHitThresh, self.maxsize))
113
114         regionDict = {"1": [(10, 20, 10, 15, 1), (100, 4000, 3900, 111, 1)],
115                       "2": [(11, 21, 11, 18, 1)]
116         }
117         result = {"2": [{"start": 11, "length": 11, "topPos": [7]}],
118                   "1": [{"start": 10, "length": 10, "topPos": [5]}]
119         }
120         self.assertEquals(result, getfasta.getRegionUsingPeaks(regionDict, self.minHitThresh, self.maxsize))
121
122         regionDict = {"1": [(10, 20, 10, 15, 1), (100, 4000, 3900, 111, 1), (50, 60, 10, 59, 1)],
123                       "2": [(11, 21, 11, 18, 1)]
124         }
125         result = {"2": [{"start": 11, "length": 11, "topPos": [7]}],
126                   "1": [{"start": 10, "length": 10, "topPos": [5]},
127                         {"start": 50, "length": 10, "topPos": [9]}]
128         }
129         self.assertEquals(result, getfasta.getRegionUsingPeaks(regionDict, self.minHitThresh, self.maxsize))
130
131
132     #TODO: write test.  This seems to not make sense.  We are always returning a "topPos" of range(rlen).
133     # need to check to see if the issue might be with commoncode.findPeak as there is a lot of questionable
134     # logic in that one
135     def testGetRegionUsingRDS(self):
136         rds = ReadDataset.ReadDataset(testDBName, initialize=True, datasetType="DNA", verbose=False)
137         rds.insertMetadata([("readsize", "100")])
138         rdsEntryList = [("testRead", "chr1", 10, 100, "+", 1.0, "", "")]
139         rds.insertUniqs(rdsEntryList)
140         self.assertEquals({}, getfasta.getRegionUsingRDS(self.regionDict, rds, self.minHitThresh, self.maxsize))
141
142         regionDict = {"1": [],
143                       "2": []
144         }
145         result = {"2": [],
146                   "1": []
147         }
148         self.assertEquals(result, getfasta.getRegionUsingRDS(regionDict, rds, self.minHitThresh, self.maxsize))
149
150         # Ack with a capital ACK.
151         regionDict = {"1": [(1, 600, 5)],
152                       "2": []
153         }
154         result = {"1": [{"start": 1, "length": 5, "topPos": [0, 1, 2, 3, 4]}],
155                   "2": []
156         }
157         self.assertEquals(result, getfasta.getRegionUsingRDS(regionDict, rds, self.minHitThresh, self.maxsize))
158
159         del(rds)
160
161
162     def testWriteFastaFile(self):
163         ncregions = {}
164         getfasta.writeFastaFile(ncregions, "hsapiens", self.outfilename)
165         for line in open(self.outfilename):
166             self.assertEquals("", line)
167
168         ncregions = {"1": [],
169                      "2": []
170         }
171         getfasta.writeFastaFile(ncregions, "hsapiens", self.outfilename)
172         for line in open(self.outfilename):
173             self.assertEquals("", line)
174
175         ncregions = {"1": [{"start": 12000, "length": 50, "topPos": [6]}],
176                      "2": []
177         }
178         getfasta.writeFastaFile(ncregions, "hsapiens", self.outfilename)
179         fastaFile = open(self.outfilename)
180         self.assertEquals(">chr1:11956-12057\n", fastaFile.readline())
181         self.assertEquals("tcatagtcccctggccccattaatggattctgggatagacatgaggaccaagccaggTGGGATGAGTGAGTGTGGCTTCTGGAGGAAGTGGGGACACAGGA\n", fastaFile.readline())
182         self.assertEquals("", fastaFile.readline())
183
184         ncregions = {"1": [{"start": 12000, "length": 50, "topPos": [6]}],
185                      "2": [{"start": 18000, "length": 50, "topPos": [30]}]
186         }
187         getfasta.writeFastaFile(ncregions, "hsapiens", self.outfilename)
188         fastaFile = open(self.outfilename)
189         self.assertEquals(">chr1:11956-12057\n", fastaFile.readline())
190         self.assertEquals("tcatagtcccctggccccattaatggattctgggatagacatgaggaccaagccaggTGGGATGAGTGAGTGTGGCTTCTGGAGGAAGTGGGGACACAGGA\n", fastaFile.readline())
191         self.assertEquals(">chr2:17980-18081\n", fastaFile.readline())
192         self.assertEquals("ATCATTTCAAGGATGCTTTGAGGGTAAAAAGAATGATCAATTGTGAAGCAGTGAATTGTGCTGCCAGGCACAATTCATTGGGTAATAGAAAGCTTCATTTA\n", fastaFile.readline())
193         self.assertEquals("", fastaFile.readline())
194
195         ncregions = {"1": [{"start": 12000, "length": 50, "topPos": [6, 20]}],
196                      "2": [{"start": 18000, "length": 50, "topPos": [30]}]
197         }
198         getfasta.writeFastaFile(ncregions, "hsapiens", self.outfilename)
199         fastaFile = open(self.outfilename)
200         self.assertEquals(">chr1:11956-12057\n", fastaFile.readline())
201         self.assertEquals("tcatagtcccctggccccattaatggattctgggatagacatgaggaccaagccaggTGGGATGAGTGAGTGTGGCTTCTGGAGGAAGTGGGGACACAGGA\n", fastaFile.readline())
202         self.assertEquals(">chr2:17980-18081\n", fastaFile.readline())
203         self.assertEquals("ATCATTTCAAGGATGCTTTGAGGGTAAAAAGAATGATCAATTGTGAAGCAGTGAATTGTGCTGCCAGGCACAATTCATTGGGTAATAGAAAGCTTCATTTA\n", fastaFile.readline())
204         self.assertEquals("", fastaFile.readline())
205
206         ncregions = {"1": [{"start": 12000, "length": 50, "topPos": [6]},
207                            {"start": 15000, "length": 50, "topPos": [2]}
208                           ],
209                      "2": [{"start": 18000, "length": 50, "topPos": [30]}]
210         }
211         getfasta.writeFastaFile(ncregions, "hsapiens", self.outfilename)
212         fastaFile = open(self.outfilename)
213         self.assertEquals(">chr1:11956-12057\n", fastaFile.readline())
214         self.assertEquals("tcatagtcccctggccccattaatggattctgggatagacatgaggaccaagccaggTGGGATGAGTGAGTGTGGCTTCTGGAGGAAGTGGGGACACAGGA\n", fastaFile.readline())
215         self.assertEquals(">chr1:14952-15053\n", fastaFile.readline())
216         self.assertEquals("AGTGAATGAGGGAAAGGGCAGGGCCCGGGACTGGGGAATCTGTAGGGTCAATGGAGGAGTTCAGAGAAGGTGCAACATTTCTGACCCCCTACAAGGTGCTT\n", fastaFile.readline())
217         self.assertEquals(">chr2:17980-18081\n", fastaFile.readline())
218         self.assertEquals("ATCATTTCAAGGATGCTTTGAGGGTAAAAAGAATGATCAATTGTGAAGCAGTGAATTGTGCTGCCAGGCACAATTCATTGGGTAATAGAAAGCTTCATTTA\n", fastaFile.readline())
219         self.assertEquals("", fastaFile.readline())
220
221
222 def suite():
223     suite = unittest.TestSuite()
224     suite.addTest(unittest.makeSuite(TestGetFasta))
225
226     return suite
227
228
229 if __name__ == "__main__":
230     #import sys;sys.argv = ['', 'Test.testName']
231     unittest.main()