5 from htsworkflow.pipelines import sequences
8 class SequenceFileTests(unittest.TestCase):
10 Make sure the sequence archive class works
12 def test_get_flowcell_cycle(self):
14 ('/root/42BW9AAXX/C1-152',
15 sequences.FlowcellPath('42BW9AAXX', 1, 152, None)),
16 ('/root/42BW9AAXX/C1-152/',
17 sequences.FlowcellPath('42BW9AAXX', 1, 152, None)),
18 ('/root/42BW9AAXX/C1-152/Project_12345',
19 sequences.FlowcellPath('42BW9AAXX', 1, 152, 'Project_12345')),
20 ('/root/42BW9AAXX/C1-152/Project_12345/',
21 sequences.FlowcellPath('42BW9AAXX', 1, 152, 'Project_12345')),
25 path = sequences.get_flowcell_cycle(t[0])
26 self.failUnlessEqual(path, t[1])
28 def test_flowcell_cycle(self):
30 Make sure code to parse directory heirarchy works
32 path = '/root/42BW9AAXX/C1-152'
33 flowcell, start, stop, project = sequences.get_flowcell_cycle(path)
35 self.failUnlessEqual(flowcell, '42BW9AAXX')
36 self.failUnlessEqual(start, 1)
37 self.failUnlessEqual(stop, 152)
38 self.failUnlessEqual(project, None)
40 path = '/root/42BW9AAXX/other'
41 self.failUnlessRaises(ValueError, sequences.get_flowcell_cycle, path)
43 def test_flowcell_project_cycle(self):
45 Make sure code to parse directory heirarchy works
47 path = '/root/42BW9AAXX/C1-152/Project_12345_Index1'
48 flowcell, start, stop, project = sequences.get_flowcell_cycle(path)
50 self.failUnlessEqual(flowcell, '42BW9AAXX')
51 self.failUnlessEqual(start, 1)
52 self.failUnlessEqual(stop, 152)
53 self.failUnlessEqual(project, 'Project_12345_Index1')
55 path = '/root/42BW9AAXX/other'
56 self.failUnlessRaises(ValueError, sequences.get_flowcell_cycle, path)
59 path = '/root/42BW9AAXX/C1-38'
60 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_4.srf'
61 pathname = os.path.join(path,name)
62 f = sequences.parse_srf(path, name)
64 self.failUnlessEqual(f.filetype, 'srf')
65 self.failUnlessEqual(f.path, pathname)
66 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
67 self.failUnlessEqual(f.lane, 4)
68 self.failUnlessEqual(f.read, None)
69 self.failUnlessEqual(f.pf, None)
70 self.failUnlessEqual(f.cycle, 38)
73 path = '/root/42BW9AAXX/C1-36'
74 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1.tar.bz2'
75 pathname = os.path.join(path,name)
76 f = sequences.parse_qseq(path, name)
78 self.failUnlessEqual(f.filetype, 'qseq')
79 self.failUnlessEqual(f.path, pathname)
80 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
81 self.failUnlessEqual(f.lane, 4)
82 self.failUnlessEqual(f.read, 1)
83 self.failUnlessEqual(f.pf, None)
84 self.failUnlessEqual(f.cycle, 36)
87 path = '/root/ilmn200901/C1-202'
88 name = 'woldlab_090125_HWI-EAS_0000_ilmn200901_l1_r1.tar.bz2'
89 pathname = os.path.join(path, name)
90 f = sequences.parse_qseq(path, name)
92 self.failUnlessEqual(f.filetype, 'qseq')
93 self.failUnlessEqual(f.path, pathname)
94 self.failUnlessEqual(f.lane, 1)
95 self.failUnlessEqual(f.read, 1)
96 self.failUnlessEqual(f.pf, None)
97 self.failUnlessEqual(f.cycle, 202)
100 path = '/root/42BW9AAXX/C1-38'
101 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1_pass.fastq.bz2'
102 pathname = os.path.join(path,name)
103 f = sequences.parse_fastq(path, name)
105 self.failUnlessEqual(f.filetype, 'fastq')
106 self.failUnlessEqual(f.path, pathname)
107 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
108 self.failUnlessEqual(f.lane, 4)
109 self.failUnlessEqual(f.read, 1)
110 self.failUnlessEqual(f.pf, True)
111 self.failUnlessEqual(f.cycle, 38)
113 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r2_nopass.fastq.bz2'
114 pathname = os.path.join(path,name)
115 f = sequences.parse_fastq(path, name)
117 self.failUnlessEqual(f.filetype, 'fastq')
118 self.failUnlessEqual(f.path, pathname)
119 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
120 self.failUnlessEqual(f.lane, 4)
121 self.failUnlessEqual(f.read, 2)
122 self.failUnlessEqual(f.pf, False)
123 self.failUnlessEqual(f.cycle, 38)
125 def test_project_fastq(self):
126 path = '/root/42BW9AAXX/C1-38/Project_12345'
127 name = '11111_NoIndex_L001_R1_001.fastq.gz'
128 pathname = os.path.join(path,name)
129 f = sequences.parse_fastq(path, name)
131 self.failUnlessEqual(f.filetype, 'split_fastq')
132 self.failUnlessEqual(f.path, pathname)
133 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
134 self.failUnlessEqual(f.lane, 1)
135 self.failUnlessEqual(f.read, 1)
136 self.failUnlessEqual(f.pf, True)
137 self.failUnlessEqual(f.project, '11111')
138 self.failUnlessEqual(f.index, 'NoIndex')
139 self.failUnlessEqual(f.cycle, 38)
141 name = '11112_AAATTT_L001_R2_003.fastq.gz'
142 pathname = os.path.join(path,name)
143 f = sequences.parse_fastq(path, name)
145 self.failUnlessEqual(f.filetype, 'split_fastq')
146 self.failUnlessEqual(f.path, pathname)
147 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
148 self.failUnlessEqual(f.lane, 1)
149 self.failUnlessEqual(f.read, 2)
150 self.failUnlessEqual(f.pf, True)
151 self.failUnlessEqual(f.project, '11112')
152 self.failUnlessEqual(f.index, 'AAATTT')
153 self.failUnlessEqual(f.cycle, 38)
155 def test_project_fastq_hashing(self):
156 """Can we tell the difference between sequence files?
158 path = '/root/42BW9AAXX/C1-38/Project_12345'
159 names = [('11111_NoIndex_L001_R1_001.fastq.gz',
160 '11111_NoIndex_L001_R2_001.fastq.gz'),
161 ('11112_NoIndex_L001_R1_001.fastq.gz',
162 '11112_NoIndex_L001_R1_002.fastq.gz')
164 for a_name, b_name in names:
165 a = sequences.parse_fastq(path, a_name)
166 b = sequences.parse_fastq(path, b_name)
167 self.failIfEqual(a, b)
168 self.failIfEqual(a.key(), b.key())
169 self.failIfEqual(hash(a), hash(b))
171 def test_eland(self):
172 path = '/root/42BW9AAXX/C1-38'
173 name = 's_4_eland_extended.txt.bz2'
174 pathname = os.path.join(path,name)
175 f = sequences.parse_eland(path, name)
177 self.failUnlessEqual(f.filetype, 'eland')
178 self.failUnlessEqual(f.path, pathname)
179 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
180 self.failUnlessEqual(f.lane, 4)
181 self.failUnlessEqual(f.read, None)
182 self.failUnlessEqual(f.pf, None)
183 self.failUnlessEqual(f.cycle, 38)
185 path = '/root/42BW9AAXX/C1-152'
186 name = 's_4_1_eland_extended.txt.bz2'
187 pathname = os.path.join(path,name)
188 f = sequences.parse_eland(path, name)
190 self.failUnlessEqual(f.filetype, 'eland')
191 self.failUnlessEqual(f.path, pathname)
192 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
193 self.failUnlessEqual(f.lane, 4)
194 self.failUnlessEqual(f.read, 1)
195 self.failUnlessEqual(f.pf, None)
196 self.failUnlessEqual(f.cycle, 152)
198 def test_sequence_file_equality(self):
199 path = '/root/42BW9AAXX/C1-38'
200 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1.tar.bz2'
202 f1_qseq = sequences.parse_qseq(path, name)
203 f2_qseq = sequences.parse_qseq(path, name)
205 self.failUnlessEqual(f1_qseq, f2_qseq)
209 Make sure that the quick and dirty sql interface in sequences works
212 db = sqlite3.connect(":memory:")
214 sequences.create_sequence_table(c)
216 data = [('/root/42BW9AAXX/C1-152',
217 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l1_r1.tar.bz2'),
218 ('/root/42BW9AAXX/C1-152',
219 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l1_r2.tar.bz2'),
220 ('/root/42BW9AAXX/C1-152',
221 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l2_r1.tar.bz2'),
222 ('/root/42BW9AAXX/C1-152',
223 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l2_r21.tar.bz2'),]
225 for path, name in data:
226 seq = sequences.parse_qseq(path, name)
229 count = c.execute("select count(*) from sequences")
230 row = count.fetchone()
231 self.failUnlessEqual(row[0], 4)
235 return unittest.makeSuite(SequenceFileTests,'test')
237 if __name__ == "__main__":
238 unittest.main(defaultTest="suite")