5 from htsworkflow.pipelines import sequences
8 class SequenceFileTests(unittest.TestCase):
10 Make sure the sequence archive class works
12 def test_get_flowcell_cycle(self):
14 ('/root/42BW9AAXX/C1-152',
15 sequences.FlowcellPath('42BW9AAXX', 1, 152, None)),
16 ('/root/42BW9AAXX/C1-152/',
17 sequences.FlowcellPath('42BW9AAXX', 1, 152, None)),
18 ('/root/42BW9AAXX/C1-152/Project_12345',
19 sequences.FlowcellPath('42BW9AAXX', 1, 152, 'Project_12345')),
20 ('/root/42BW9AAXX/C1-152/Project_12345/',
21 sequences.FlowcellPath('42BW9AAXX', 1, 152, 'Project_12345')),
25 path = sequences.get_flowcell_cycle(t[0])
26 self.failUnlessEqual(path, t[1])
28 def test_flowcell_cycle(self):
30 Make sure code to parse directory heirarchy works
32 path = '/root/42BW9AAXX/C1-152'
33 flowcell, start, stop, project = sequences.get_flowcell_cycle(path)
35 self.failUnlessEqual(flowcell, '42BW9AAXX')
36 self.failUnlessEqual(start, 1)
37 self.failUnlessEqual(stop, 152)
38 self.failUnlessEqual(project, None)
40 path = '/root/42BW9AAXX/other'
41 self.failUnlessRaises(ValueError, sequences.get_flowcell_cycle, path)
43 def test_flowcell_project_cycle(self):
45 Make sure code to parse directory heirarchy works
47 path = '/root/42BW9AAXX/C1-152/Project_12345_Index1'
48 flowcell, start, stop, project = sequences.get_flowcell_cycle(path)
50 self.failUnlessEqual(flowcell, '42BW9AAXX')
51 self.failUnlessEqual(start, 1)
52 self.failUnlessEqual(stop, 152)
53 self.failUnlessEqual(project, 'Project_12345_Index1')
55 path = '/root/42BW9AAXX/other'
56 self.failUnlessRaises(ValueError, sequences.get_flowcell_cycle, path)
59 path = '/root/42BW9AAXX/C1-38'
60 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_4.srf'
61 pathname = os.path.join(path,name)
62 f = sequences.parse_srf(path, name)
64 self.failUnlessEqual(f.filetype, 'srf')
65 self.failUnlessEqual(f.path, pathname)
66 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
67 self.failUnlessEqual(f.lane, 4)
68 self.failUnlessEqual(f.read, None)
69 self.failUnlessEqual(f.pf, None)
70 self.failUnlessEqual(f.cycle, 38)
73 path = '/root/42BW9AAXX/C1-36'
74 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1.tar.bz2'
75 pathname = os.path.join(path,name)
76 f = sequences.parse_qseq(path, name)
78 self.failUnlessEqual(f.filetype, 'qseq')
79 self.failUnlessEqual(f.path, pathname)
80 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
81 self.failUnlessEqual(f.lane, 4)
82 self.failUnlessEqual(f.read, 1)
83 self.failUnlessEqual(f.pf, None)
84 self.failUnlessEqual(f.cycle, 36)
87 path = '/root/ilmn200901/C1-202'
88 name = 'woldlab_090125_HWI-EAS_0000_ilmn200901_l1_r1.tar.bz2'
89 pathname = os.path.join(path, name)
90 f = sequences.parse_qseq(path, name)
92 self.failUnlessEqual(f.filetype, 'qseq')
93 self.failUnlessEqual(f.path, pathname)
94 self.failUnlessEqual(f.lane, 1)
95 self.failUnlessEqual(f.read, 1)
96 self.failUnlessEqual(f.pf, None)
97 self.failUnlessEqual(f.cycle, 202)
100 path = '/root/42BW9AAXX/C1-38'
101 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1_pass.fastq.bz2'
102 pathname = os.path.join(path,name)
103 f = sequences.parse_fastq(path, name)
105 self.failUnlessEqual(f.filetype, 'fastq')
106 self.failUnlessEqual(f.path, pathname)
107 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
108 self.failUnlessEqual(f.lane, 4)
109 self.failUnlessEqual(f.read, 1)
110 self.failUnlessEqual(f.pf, True)
111 self.failUnlessEqual(f.cycle, 38)
113 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r2_nopass.fastq.bz2'
114 pathname = os.path.join(path,name)
115 f = sequences.parse_fastq(path, name)
117 self.failUnlessEqual(f.filetype, 'fastq')
118 self.failUnlessEqual(f.path, pathname)
119 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
120 self.failUnlessEqual(f.lane, 4)
121 self.failUnlessEqual(f.read, 2)
122 self.failUnlessEqual(f.pf, False)
123 self.failUnlessEqual(f.cycle, 38)
125 def test_project_fastq(self):
126 path = '/root/42BW9AAXX/C1-38/Project_12345'
127 name = '11111_NoIndex_L001_R1_001.fastq.gz'
128 pathname = os.path.join(path,name)
129 f = sequences.parse_fastq(path, name)
131 self.failUnlessEqual(f.filetype, 'fastq')
132 self.failUnlessEqual(f.path, pathname)
133 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
134 self.failUnlessEqual(f.lane, 1)
135 self.failUnlessEqual(f.read, 1)
136 self.failUnlessEqual(f.pf, True)
137 self.failUnlessEqual(f.project, '11111')
138 self.failUnlessEqual(f.index, 'NoIndex')
139 self.failUnlessEqual(f.cycle, 38)
141 name = '11112_AAATTT_L001_R2_003.fastq.gz'
142 pathname = os.path.join(path,name)
143 f = sequences.parse_fastq(path, name)
145 self.failUnlessEqual(f.filetype, 'fastq')
146 self.failUnlessEqual(f.path, pathname)
147 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
148 self.failUnlessEqual(f.lane, 1)
149 self.failUnlessEqual(f.read, 2)
150 self.failUnlessEqual(f.pf, True)
151 self.failUnlessEqual(f.project, '11112')
152 self.failUnlessEqual(f.index, 'AAATTT')
153 self.failUnlessEqual(f.cycle, 38)
155 def test_eland(self):
156 path = '/root/42BW9AAXX/C1-38'
157 name = 's_4_eland_extended.txt.bz2'
158 pathname = os.path.join(path,name)
159 f = sequences.parse_eland(path, name)
161 self.failUnlessEqual(f.filetype, 'eland')
162 self.failUnlessEqual(f.path, pathname)
163 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
164 self.failUnlessEqual(f.lane, 4)
165 self.failUnlessEqual(f.read, None)
166 self.failUnlessEqual(f.pf, None)
167 self.failUnlessEqual(f.cycle, 38)
169 path = '/root/42BW9AAXX/C1-152'
170 name = 's_4_1_eland_extended.txt.bz2'
171 pathname = os.path.join(path,name)
172 f = sequences.parse_eland(path, name)
174 self.failUnlessEqual(f.filetype, 'eland')
175 self.failUnlessEqual(f.path, pathname)
176 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
177 self.failUnlessEqual(f.lane, 4)
178 self.failUnlessEqual(f.read, 1)
179 self.failUnlessEqual(f.pf, None)
180 self.failUnlessEqual(f.cycle, 152)
182 def test_sequence_file_equality(self):
183 path = '/root/42BW9AAXX/C1-38'
184 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1.tar.bz2'
186 f1_qseq = sequences.parse_qseq(path, name)
187 f2_qseq = sequences.parse_qseq(path, name)
189 self.failUnlessEqual(f1_qseq, f2_qseq)
193 Make sure that the quick and dirty sql interface in sequences works
196 db = sqlite3.connect(":memory:")
198 sequences.create_sequence_table(c)
200 data = [('/root/42BW9AAXX/C1-152',
201 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l1_r1.tar.bz2'),
202 ('/root/42BW9AAXX/C1-152',
203 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l1_r2.tar.bz2'),
204 ('/root/42BW9AAXX/C1-152',
205 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l2_r1.tar.bz2'),
206 ('/root/42BW9AAXX/C1-152',
207 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l2_r21.tar.bz2'),]
209 for path, name in data:
210 seq = sequences.parse_qseq(path, name)
213 count = c.execute("select count(*) from sequences")
214 row = count.fetchone()
215 self.failUnlessEqual(row[0], 4)
219 return unittest.makeSuite(SequenceFileTests,'test')
221 if __name__ == "__main__":
222 unittest.main(defaultTest="suite")