Core functions for saving and finding fastq files generated by a HiSeq.
[htsworkflow.git] / htsworkflow / pipelines / test / test_sequences.py
1 #!/usr/bin/env python
2 import os
3 import unittest
4
5 from htsworkflow.pipelines import sequences
6
7 class SequenceFileTests(unittest.TestCase):
8     """
9     Make sure the sequence archive class works
10     """
11     def test_flowcell_cycle(self):
12         """
13         Make sure code to parse directory heirarchy works
14         """
15         path = '/root/42BW9AAXX/C1-152'
16         flowcell, start, stop, project = sequences.get_flowcell_cycle(path)
17
18         self.failUnlessEqual(flowcell, '42BW9AAXX')
19         self.failUnlessEqual(start, 1)
20         self.failUnlessEqual(stop, 152)
21         self.failUnlessEqual(project, None)
22
23         path = '/root/42BW9AAXX/other'
24         self.failUnlessRaises(ValueError, sequences.get_flowcell_cycle, path)
25
26     def test_flowcell_project_cycle(self):
27         """
28         Make sure code to parse directory heirarchy works
29         """
30         path = '/root/42BW9AAXX/C1-152/Project_12345_Index1'
31         flowcell, start, stop, project = sequences.get_flowcell_cycle(path)
32
33         self.failUnlessEqual(flowcell, '42BW9AAXX')
34         self.failUnlessEqual(start, 1)
35         self.failUnlessEqual(stop, 152)
36         self.failUnlessEqual(project, 'Project_12345_Index1')
37
38         path = '/root/42BW9AAXX/other'
39         self.failUnlessRaises(ValueError, sequences.get_flowcell_cycle, path)
40
41
42     def test_srf(self):
43         path = '/root/42BW9AAXX/C1-38'
44         name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_4.srf'
45         pathname = os.path.join(path,name)
46         f = sequences.parse_srf(path, name)
47
48         self.failUnlessEqual(f.filetype, 'srf')
49         self.failUnlessEqual(f.path, pathname)
50         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
51         self.failUnlessEqual(f.lane, 4)
52         self.failUnlessEqual(f.read, None)
53         self.failUnlessEqual(f.pf, None)
54         self.failUnlessEqual(f.cycle, 38)
55
56     def test_qseq(self):
57         path = '/root/42BW9AAXX/C1-36'
58         name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1.tar.bz2'
59         pathname = os.path.join(path,name)
60         f = sequences.parse_qseq(path, name)
61
62         self.failUnlessEqual(f.filetype, 'qseq')
63         self.failUnlessEqual(f.path, pathname)
64         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
65         self.failUnlessEqual(f.lane, 4)
66         self.failUnlessEqual(f.read, 1)
67         self.failUnlessEqual(f.pf, None)
68         self.failUnlessEqual(f.cycle, 36)
69
70
71         path = '/root/ilmn200901/C1-202'
72         name = 'woldlab_090125_HWI-EAS_0000_ilmn200901_l1_r1.tar.bz2'
73         pathname = os.path.join(path, name)
74         f = sequences.parse_qseq(path, name)
75
76         self.failUnlessEqual(f.filetype, 'qseq')
77         self.failUnlessEqual(f.path, pathname)
78         self.failUnlessEqual(f.lane, 1)
79         self.failUnlessEqual(f.read, 1)
80         self.failUnlessEqual(f.pf, None)
81         self.failUnlessEqual(f.cycle, 202)
82
83     def test_fastq(self):
84         path = '/root/42BW9AAXX/C1-38'
85         name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1_pass.fastq.bz2'
86         pathname = os.path.join(path,name)
87         f = sequences.parse_fastq(path, name)
88
89         self.failUnlessEqual(f.filetype, 'fastq')
90         self.failUnlessEqual(f.path, pathname)
91         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
92         self.failUnlessEqual(f.lane, 4)
93         self.failUnlessEqual(f.read, 1)
94         self.failUnlessEqual(f.pf, True)
95         self.failUnlessEqual(f.cycle, 38)
96
97         name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r2_nopass.fastq.bz2'
98         pathname = os.path.join(path,name)
99         f = sequences.parse_fastq(path, name)
100
101         self.failUnlessEqual(f.filetype, 'fastq')
102         self.failUnlessEqual(f.path, pathname)
103         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
104         self.failUnlessEqual(f.lane, 4)
105         self.failUnlessEqual(f.read, 2)
106         self.failUnlessEqual(f.pf, False)
107         self.failUnlessEqual(f.cycle, 38)
108
109     def test_project_fastq(self):
110         path = '/root/42BW9AAXX/C1-38/Project_12345'
111         name = '11111_NoIndex_L001_R1_001.fastq.gz'
112         pathname = os.path.join(path,name)
113         f = sequences.parse_fastq(path, name)
114
115         self.failUnlessEqual(f.filetype, 'fastq')
116         self.failUnlessEqual(f.path, pathname)
117         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
118         self.failUnlessEqual(f.lane, 1)
119         self.failUnlessEqual(f.read, 1)
120         self.failUnlessEqual(f.pf, True)
121         self.failUnlessEqual(f.project, '11111')
122         self.failUnlessEqual(f.index, 'NoIndex')
123         self.failUnlessEqual(f.cycle, 38)
124
125         name = '11112_AAATTT_L001_R2_003.fastq.gz'
126         pathname = os.path.join(path,name)
127         f = sequences.parse_fastq(path, name)
128
129         self.failUnlessEqual(f.filetype, 'fastq')
130         self.failUnlessEqual(f.path, pathname)
131         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
132         self.failUnlessEqual(f.lane, 1)
133         self.failUnlessEqual(f.read, 2)
134         self.failUnlessEqual(f.pf, True)
135         self.failUnlessEqual(f.project, '11112')
136         self.failUnlessEqual(f.index, 'AAATTT')
137         self.failUnlessEqual(f.cycle, 38)
138
139     def test_eland(self):
140         path = '/root/42BW9AAXX/C1-38'
141         name = 's_4_eland_extended.txt.bz2'
142         pathname = os.path.join(path,name)
143         f = sequences.parse_eland(path, name)
144
145         self.failUnlessEqual(f.filetype, 'eland')
146         self.failUnlessEqual(f.path, pathname)
147         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
148         self.failUnlessEqual(f.lane, 4)
149         self.failUnlessEqual(f.read, None)
150         self.failUnlessEqual(f.pf, None)
151         self.failUnlessEqual(f.cycle, 38)
152
153         path = '/root/42BW9AAXX/C1-152'
154         name = 's_4_1_eland_extended.txt.bz2'
155         pathname = os.path.join(path,name)
156         f = sequences.parse_eland(path, name)
157
158         self.failUnlessEqual(f.filetype, 'eland')
159         self.failUnlessEqual(f.path, pathname)
160         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
161         self.failUnlessEqual(f.lane, 4)
162         self.failUnlessEqual(f.read, 1)
163         self.failUnlessEqual(f.pf, None)
164         self.failUnlessEqual(f.cycle, 152)
165
166     def test_sequence_file_equality(self):
167         path = '/root/42BW9AAXX/C1-38'
168         name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1.tar.bz2'
169
170         f1_qseq = sequences.parse_qseq(path, name)
171         f2_qseq = sequences.parse_qseq(path, name)
172
173         self.failUnlessEqual(f1_qseq, f2_qseq)
174
175     def test_sql(self):
176         """
177         Make sure that the quick and dirty sql interface in sequences works
178         """
179         import sqlite3
180         db = sqlite3.connect(":memory:")
181         c = db.cursor()
182         sequences.create_sequence_table(c)
183
184         data = [('/root/42BW9AAXX/C1-152',
185                 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l1_r1.tar.bz2'),
186                 ('/root/42BW9AAXX/C1-152',
187                 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l1_r2.tar.bz2'),
188                 ('/root/42BW9AAXX/C1-152',
189                 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l2_r1.tar.bz2'),
190                 ('/root/42BW9AAXX/C1-152',
191                 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l2_r21.tar.bz2'),]
192
193         for path, name in data:
194             seq = sequences.parse_qseq(path, name)
195             seq.save(c)
196
197         count = c.execute("select count(*) from sequences")
198         row = count.fetchone()
199         self.failUnlessEqual(row[0], 4)
200
201
202 def suite():
203     return unittest.makeSuite(SequenceFileTests,'test')
204
205 if __name__ == "__main__":
206     unittest.main(defaultTest="suite")