157246a94e3eae277e78df2813f5670aabb89621
[htsworkflow.git] / htsworkflow / pipelines / test / test_sequences.py
1 #!/usr/bin/env python
2 import os
3 import unittest
4
5 from htsworkflow.pipelines import sequences
6
7
8 class SequenceFileTests(unittest.TestCase):
9     """
10     Make sure the sequence archive class works
11     """
12     def test_get_flowcell_cycle(self):
13         tests = [
14             ('/root/42BW9AAXX/C1-152',
15              sequences.FlowcellPath('42BW9AAXX', 1, 152, None)),
16             ('/root/42BW9AAXX/C1-152/',
17              sequences.FlowcellPath('42BW9AAXX', 1, 152, None)),
18             ('/root/42BW9AAXX/C1-152/Project_12345',
19              sequences.FlowcellPath('42BW9AAXX', 1, 152, 'Project_12345')),
20             ('/root/42BW9AAXX/C1-152/Project_12345/',
21              sequences.FlowcellPath('42BW9AAXX', 1, 152, 'Project_12345')),
22         ]
23
24         for t in tests:
25             path = sequences.get_flowcell_cycle(t[0])
26             self.failUnlessEqual(path, t[1])
27
28     def test_flowcell_cycle(self):
29         """
30         Make sure code to parse directory heirarchy works
31         """
32         path = '/root/42BW9AAXX/C1-152'
33         flowcell, start, stop, project = sequences.get_flowcell_cycle(path)
34
35         self.failUnlessEqual(flowcell, '42BW9AAXX')
36         self.failUnlessEqual(start, 1)
37         self.failUnlessEqual(stop, 152)
38         self.failUnlessEqual(project, None)
39
40         path = '/root/42BW9AAXX/other'
41         self.failUnlessRaises(ValueError, sequences.get_flowcell_cycle, path)
42
43     def test_flowcell_project_cycle(self):
44         """
45         Make sure code to parse directory heirarchy works
46         """
47         path = '/root/42BW9AAXX/C1-152/Project_12345_Index1'
48         flowcell, start, stop, project = sequences.get_flowcell_cycle(path)
49
50         self.failUnlessEqual(flowcell, '42BW9AAXX')
51         self.failUnlessEqual(start, 1)
52         self.failUnlessEqual(stop, 152)
53         self.failUnlessEqual(project, 'Project_12345_Index1')
54
55         path = '/root/42BW9AAXX/other'
56         self.failUnlessRaises(ValueError, sequences.get_flowcell_cycle, path)
57
58     def test_srf(self):
59         path = '/root/42BW9AAXX/C1-38'
60         name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_4.srf'
61         pathname = os.path.join(path,name)
62         f = sequences.parse_srf(path, name)
63
64         self.failUnlessEqual(f.filetype, 'srf')
65         self.failUnlessEqual(f.path, pathname)
66         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
67         self.failUnlessEqual(f.lane, 4)
68         self.failUnlessEqual(f.read, None)
69         self.failUnlessEqual(f.pf, None)
70         self.failUnlessEqual(f.cycle, 38)
71
72     def test_qseq(self):
73         path = '/root/42BW9AAXX/C1-36'
74         name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1.tar.bz2'
75         pathname = os.path.join(path,name)
76         f = sequences.parse_qseq(path, name)
77
78         self.failUnlessEqual(f.filetype, 'qseq')
79         self.failUnlessEqual(f.path, pathname)
80         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
81         self.failUnlessEqual(f.lane, 4)
82         self.failUnlessEqual(f.read, 1)
83         self.failUnlessEqual(f.pf, None)
84         self.failUnlessEqual(f.cycle, 36)
85
86
87         path = '/root/ilmn200901/C1-202'
88         name = 'woldlab_090125_HWI-EAS_0000_ilmn200901_l1_r1.tar.bz2'
89         pathname = os.path.join(path, name)
90         f = sequences.parse_qseq(path, name)
91
92         self.failUnlessEqual(f.filetype, 'qseq')
93         self.failUnlessEqual(f.path, pathname)
94         self.failUnlessEqual(f.lane, 1)
95         self.failUnlessEqual(f.read, 1)
96         self.failUnlessEqual(f.pf, None)
97         self.failUnlessEqual(f.cycle, 202)
98
99     def test_fastq(self):
100         path = '/root/42BW9AAXX/C1-38'
101         name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1_pass.fastq.bz2'
102         pathname = os.path.join(path,name)
103         f = sequences.parse_fastq(path, name)
104
105         self.failUnlessEqual(f.filetype, 'fastq')
106         self.failUnlessEqual(f.path, pathname)
107         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
108         self.failUnlessEqual(f.lane, 4)
109         self.failUnlessEqual(f.read, 1)
110         self.failUnlessEqual(f.pf, True)
111         self.failUnlessEqual(f.cycle, 38)
112
113         name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r2_nopass.fastq.bz2'
114         pathname = os.path.join(path,name)
115         f = sequences.parse_fastq(path, name)
116
117         self.failUnlessEqual(f.filetype, 'fastq')
118         self.failUnlessEqual(f.path, pathname)
119         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
120         self.failUnlessEqual(f.lane, 4)
121         self.failUnlessEqual(f.read, 2)
122         self.failUnlessEqual(f.pf, False)
123         self.failUnlessEqual(f.cycle, 38)
124
125     def test_project_fastq(self):
126         path = '/root/42BW9AAXX/C1-38/Project_12345'
127         name = '11111_NoIndex_L001_R1_001.fastq.gz'
128         pathname = os.path.join(path,name)
129         f = sequences.parse_fastq(path, name)
130
131         self.failUnlessEqual(f.filetype, 'fastq')
132         self.failUnlessEqual(f.path, pathname)
133         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
134         self.failUnlessEqual(f.lane, 1)
135         self.failUnlessEqual(f.read, 1)
136         self.failUnlessEqual(f.pf, True)
137         self.failUnlessEqual(f.project, '11111')
138         self.failUnlessEqual(f.index, 'NoIndex')
139         self.failUnlessEqual(f.cycle, 38)
140
141         name = '11112_AAATTT_L001_R2_003.fastq.gz'
142         pathname = os.path.join(path,name)
143         f = sequences.parse_fastq(path, name)
144
145         self.failUnlessEqual(f.filetype, 'fastq')
146         self.failUnlessEqual(f.path, pathname)
147         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
148         self.failUnlessEqual(f.lane, 1)
149         self.failUnlessEqual(f.read, 2)
150         self.failUnlessEqual(f.pf, True)
151         self.failUnlessEqual(f.project, '11112')
152         self.failUnlessEqual(f.index, 'AAATTT')
153         self.failUnlessEqual(f.cycle, 38)
154
155     def test_eland(self):
156         path = '/root/42BW9AAXX/C1-38'
157         name = 's_4_eland_extended.txt.bz2'
158         pathname = os.path.join(path,name)
159         f = sequences.parse_eland(path, name)
160
161         self.failUnlessEqual(f.filetype, 'eland')
162         self.failUnlessEqual(f.path, pathname)
163         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
164         self.failUnlessEqual(f.lane, 4)
165         self.failUnlessEqual(f.read, None)
166         self.failUnlessEqual(f.pf, None)
167         self.failUnlessEqual(f.cycle, 38)
168
169         path = '/root/42BW9AAXX/C1-152'
170         name = 's_4_1_eland_extended.txt.bz2'
171         pathname = os.path.join(path,name)
172         f = sequences.parse_eland(path, name)
173
174         self.failUnlessEqual(f.filetype, 'eland')
175         self.failUnlessEqual(f.path, pathname)
176         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
177         self.failUnlessEqual(f.lane, 4)
178         self.failUnlessEqual(f.read, 1)
179         self.failUnlessEqual(f.pf, None)
180         self.failUnlessEqual(f.cycle, 152)
181
182     def test_sequence_file_equality(self):
183         path = '/root/42BW9AAXX/C1-38'
184         name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1.tar.bz2'
185
186         f1_qseq = sequences.parse_qseq(path, name)
187         f2_qseq = sequences.parse_qseq(path, name)
188
189         self.failUnlessEqual(f1_qseq, f2_qseq)
190
191     def test_sql(self):
192         """
193         Make sure that the quick and dirty sql interface in sequences works
194         """
195         import sqlite3
196         db = sqlite3.connect(":memory:")
197         c = db.cursor()
198         sequences.create_sequence_table(c)
199
200         data = [('/root/42BW9AAXX/C1-152',
201                 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l1_r1.tar.bz2'),
202                 ('/root/42BW9AAXX/C1-152',
203                 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l1_r2.tar.bz2'),
204                 ('/root/42BW9AAXX/C1-152',
205                 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l2_r1.tar.bz2'),
206                 ('/root/42BW9AAXX/C1-152',
207                 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l2_r21.tar.bz2'),]
208
209         for path, name in data:
210             seq = sequences.parse_qseq(path, name)
211             seq.save(c)
212
213         count = c.execute("select count(*) from sequences")
214         row = count.fetchone()
215         self.failUnlessEqual(row[0], 4)
216
217
218 def suite():
219     return unittest.makeSuite(SequenceFileTests,'test')
220
221 if __name__ == "__main__":
222     unittest.main(defaultTest="suite")