Extend htsworkflow.pipelines.sequences to also try to figure out the cycle count.
[htsworkflow.git] / htsworkflow / pipelines / test / test_sequences.py
1 #!/usr/bin/env python
2 import os
3 import unittest
4
5 from htsworkflow.pipelines import sequences
6
7 class SequenceFileTests(unittest.TestCase):
8     """
9     Make sure the sequence archive class works
10     """
11     def test_flowcell_cycle(self):
12         """
13         Make sure code to parse directory heirarchy works
14         """
15         path = '/root/42BW9AAXX/C1-152'
16         flowcell, start, stop = sequences.get_flowcell_cycle(path)
17
18         self.failUnlessEqual(flowcell, '42BW9AAXX')
19         self.failUnlessEqual(start, 1)
20         self.failUnlessEqual(stop, 152)
21
22         path = '/root/42BW9AAXX/other'
23         self.failUnlessRaises(ValueError, sequences.get_flowcell_cycle, path)
24
25
26     def test_srf(self):
27         path = '/root/42BW9AAXX/C1-38'
28         name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_4.srf'
29         pathname = os.path.join(path,name)
30         f = sequences.parse_srf(path, name)
31
32         self.failUnlessEqual(f.filetype, 'srf')
33         self.failUnlessEqual(f.path, pathname)
34         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
35         self.failUnlessEqual(f.lane, 4)
36         self.failUnlessEqual(f.read, None)
37         self.failUnlessEqual(f.pf, None)
38         self.failUnlessEqual(f.cycle, 38)
39
40     def test_qseq(self):
41         path = '/root/42BW9AAXX/C1-36'
42         name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1.tar.bz2'
43         pathname = os.path.join(path,name)
44         f = sequences.parse_qseq(path, name)
45
46         self.failUnlessEqual(f.filetype, 'qseq')
47         self.failUnlessEqual(f.path, pathname)
48         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
49         self.failUnlessEqual(f.lane, 4)
50         self.failUnlessEqual(f.read, 1)
51         self.failUnlessEqual(f.pf, None)
52         self.failUnlessEqual(f.cycle, 36)
53
54
55         path = '/root/ilmn200901/C1-202'
56         name = 'woldlab_090125_HWI-EAS_0000_ilmn200901_l1_r1.tar.bz2'
57         pathname = os.path.join(path, name)
58         f = sequences.parse_qseq(path, name)
59
60         self.failUnlessEqual(f.filetype, 'qseq')
61         self.failUnlessEqual(f.path, pathname)
62         self.failUnlessEqual(f.lane, 1)
63         self.failUnlessEqual(f.read, 1)
64         self.failUnlessEqual(f.pf, None)
65         self.failUnlessEqual(f.cycle, 202)
66
67     def test_fastq(self):
68         path = '/root/42BW9AAXX/C1-38'
69         name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1_pass.fastq.bz2'
70         pathname = os.path.join(path,name)
71         f = sequences.parse_fastq(path, name)
72
73         self.failUnlessEqual(f.filetype, 'fastq')
74         self.failUnlessEqual(f.path, pathname)
75         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
76         self.failUnlessEqual(f.lane, 4)
77         self.failUnlessEqual(f.read, 1)
78         self.failUnlessEqual(f.pf, True)
79         self.failUnlessEqual(f.cycle, 38)
80
81         name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r2_nopass.fastq.bz2'
82         pathname = os.path.join(path,name)
83         f = sequences.parse_fastq(path, name)
84
85         self.failUnlessEqual(f.filetype, 'fastq')
86         self.failUnlessEqual(f.path, pathname)
87         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
88         self.failUnlessEqual(f.lane, 4)
89         self.failUnlessEqual(f.read, 2)
90         self.failUnlessEqual(f.pf, False)
91         self.failUnlessEqual(f.cycle, 38)
92
93     def test_eland(self):
94         path = '/root/42BW9AAXX/C1-38'
95         name = 's_4_eland_extended.txt.bz2'
96         pathname = os.path.join(path,name)
97         f = sequences.parse_eland(path, name)
98
99         self.failUnlessEqual(f.filetype, 'eland')
100         self.failUnlessEqual(f.path, pathname)
101         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
102         self.failUnlessEqual(f.lane, 4)
103         self.failUnlessEqual(f.read, None)
104         self.failUnlessEqual(f.pf, None)
105         self.failUnlessEqual(f.cycle, 38)
106
107         path = '/root/42BW9AAXX/C1-152'
108         name = 's_4_1_eland_extended.txt.bz2'
109         pathname = os.path.join(path,name)
110         f = sequences.parse_eland(path, name)
111
112         self.failUnlessEqual(f.filetype, 'eland')
113         self.failUnlessEqual(f.path, pathname)
114         self.failUnlessEqual(f.flowcell, '42BW9AAXX')
115         self.failUnlessEqual(f.lane, 4)
116         self.failUnlessEqual(f.read, 1)
117         self.failUnlessEqual(f.pf, None)
118         self.failUnlessEqual(f.cycle, 152)
119
120     def test_sequence_file_equality(self):
121         path = '/root/42BW9AAXX/C1-38'
122         name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1.tar.bz2'
123
124         f1_qseq = sequences.parse_qseq(path, name)
125         f2_qseq = sequences.parse_qseq(path, name)
126
127         self.failUnlessEqual(f1_qseq, f2_qseq)
128
129     def test_sql(self):
130         """
131         Make sure that the quick and dirty sql interface in sequences works
132         """
133         import sqlite3
134         db = sqlite3.connect(":memory:")
135         c = db.cursor()
136         sequences.create_sequence_table(c)
137         
138         data = [('/root/42BW9AAXX/C1-152',
139                 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l1_r1.tar.bz2'),
140                 ('/root/42BW9AAXX/C1-152',
141                 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l1_r2.tar.bz2'),
142                 ('/root/42BW9AAXX/C1-152',
143                 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l2_r1.tar.bz2'),
144                 ('/root/42BW9AAXX/C1-152',
145                 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l2_r21.tar.bz2'),]
146
147         for path, name in data:
148             seq = sequences.parse_qseq(path, name)
149             seq.save(c)
150
151         count = c.execute("select count(*) from sequences")
152         row = count.fetchone()
153         self.failUnlessEqual(row[0], 4)
154  
155
156 def suite():
157     return unittest.makeSuite(SequenceFileTests,'test')
158
159 if __name__ == "__main__":
160     unittest.main(defaultTest="suite")