5 from htsworkflow.pipelines import sequences
7 class SequenceFileTests(unittest.TestCase):
9 Make sure the sequence archive class works
11 def test_flowcell_cycle(self):
13 Make sure code to parse directory heirarchy works
15 path = '/root/42BW9AAXX/C1-152'
16 flowcell, start, stop, project = sequences.get_flowcell_cycle(path)
18 self.failUnlessEqual(flowcell, '42BW9AAXX')
19 self.failUnlessEqual(start, 1)
20 self.failUnlessEqual(stop, 152)
21 self.failUnlessEqual(project, None)
23 path = '/root/42BW9AAXX/other'
24 self.failUnlessRaises(ValueError, sequences.get_flowcell_cycle, path)
26 def test_flowcell_project_cycle(self):
28 Make sure code to parse directory heirarchy works
30 path = '/root/42BW9AAXX/C1-152/Project_12345_Index1'
31 flowcell, start, stop, project = sequences.get_flowcell_cycle(path)
33 self.failUnlessEqual(flowcell, '42BW9AAXX')
34 self.failUnlessEqual(start, 1)
35 self.failUnlessEqual(stop, 152)
36 self.failUnlessEqual(project, 'Project_12345_Index1')
38 path = '/root/42BW9AAXX/other'
39 self.failUnlessRaises(ValueError, sequences.get_flowcell_cycle, path)
43 path = '/root/42BW9AAXX/C1-38'
44 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_4.srf'
45 pathname = os.path.join(path,name)
46 f = sequences.parse_srf(path, name)
48 self.failUnlessEqual(f.filetype, 'srf')
49 self.failUnlessEqual(f.path, pathname)
50 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
51 self.failUnlessEqual(f.lane, 4)
52 self.failUnlessEqual(f.read, None)
53 self.failUnlessEqual(f.pf, None)
54 self.failUnlessEqual(f.cycle, 38)
57 path = '/root/42BW9AAXX/C1-36'
58 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1.tar.bz2'
59 pathname = os.path.join(path,name)
60 f = sequences.parse_qseq(path, name)
62 self.failUnlessEqual(f.filetype, 'qseq')
63 self.failUnlessEqual(f.path, pathname)
64 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
65 self.failUnlessEqual(f.lane, 4)
66 self.failUnlessEqual(f.read, 1)
67 self.failUnlessEqual(f.pf, None)
68 self.failUnlessEqual(f.cycle, 36)
71 path = '/root/ilmn200901/C1-202'
72 name = 'woldlab_090125_HWI-EAS_0000_ilmn200901_l1_r1.tar.bz2'
73 pathname = os.path.join(path, name)
74 f = sequences.parse_qseq(path, name)
76 self.failUnlessEqual(f.filetype, 'qseq')
77 self.failUnlessEqual(f.path, pathname)
78 self.failUnlessEqual(f.lane, 1)
79 self.failUnlessEqual(f.read, 1)
80 self.failUnlessEqual(f.pf, None)
81 self.failUnlessEqual(f.cycle, 202)
84 path = '/root/42BW9AAXX/C1-38'
85 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1_pass.fastq.bz2'
86 pathname = os.path.join(path,name)
87 f = sequences.parse_fastq(path, name)
89 self.failUnlessEqual(f.filetype, 'fastq')
90 self.failUnlessEqual(f.path, pathname)
91 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
92 self.failUnlessEqual(f.lane, 4)
93 self.failUnlessEqual(f.read, 1)
94 self.failUnlessEqual(f.pf, True)
95 self.failUnlessEqual(f.cycle, 38)
97 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r2_nopass.fastq.bz2'
98 pathname = os.path.join(path,name)
99 f = sequences.parse_fastq(path, name)
101 self.failUnlessEqual(f.filetype, 'fastq')
102 self.failUnlessEqual(f.path, pathname)
103 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
104 self.failUnlessEqual(f.lane, 4)
105 self.failUnlessEqual(f.read, 2)
106 self.failUnlessEqual(f.pf, False)
107 self.failUnlessEqual(f.cycle, 38)
109 def test_project_fastq(self):
110 path = '/root/42BW9AAXX/C1-38/Project_12345'
111 name = '11111_NoIndex_L001_R1_001.fastq.gz'
112 pathname = os.path.join(path,name)
113 f = sequences.parse_fastq(path, name)
115 self.failUnlessEqual(f.filetype, 'fastq')
116 self.failUnlessEqual(f.path, pathname)
117 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
118 self.failUnlessEqual(f.lane, 1)
119 self.failUnlessEqual(f.read, 1)
120 self.failUnlessEqual(f.pf, True)
121 self.failUnlessEqual(f.project, '11111')
122 self.failUnlessEqual(f.index, 'NoIndex')
123 self.failUnlessEqual(f.cycle, 38)
125 name = '11112_AAATTT_L001_R2_003.fastq.gz'
126 pathname = os.path.join(path,name)
127 f = sequences.parse_fastq(path, name)
129 self.failUnlessEqual(f.filetype, 'fastq')
130 self.failUnlessEqual(f.path, pathname)
131 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
132 self.failUnlessEqual(f.lane, 1)
133 self.failUnlessEqual(f.read, 2)
134 self.failUnlessEqual(f.pf, True)
135 self.failUnlessEqual(f.project, '11112')
136 self.failUnlessEqual(f.index, 'AAATTT')
137 self.failUnlessEqual(f.cycle, 38)
139 def test_eland(self):
140 path = '/root/42BW9AAXX/C1-38'
141 name = 's_4_eland_extended.txt.bz2'
142 pathname = os.path.join(path,name)
143 f = sequences.parse_eland(path, name)
145 self.failUnlessEqual(f.filetype, 'eland')
146 self.failUnlessEqual(f.path, pathname)
147 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
148 self.failUnlessEqual(f.lane, 4)
149 self.failUnlessEqual(f.read, None)
150 self.failUnlessEqual(f.pf, None)
151 self.failUnlessEqual(f.cycle, 38)
153 path = '/root/42BW9AAXX/C1-152'
154 name = 's_4_1_eland_extended.txt.bz2'
155 pathname = os.path.join(path,name)
156 f = sequences.parse_eland(path, name)
158 self.failUnlessEqual(f.filetype, 'eland')
159 self.failUnlessEqual(f.path, pathname)
160 self.failUnlessEqual(f.flowcell, '42BW9AAXX')
161 self.failUnlessEqual(f.lane, 4)
162 self.failUnlessEqual(f.read, 1)
163 self.failUnlessEqual(f.pf, None)
164 self.failUnlessEqual(f.cycle, 152)
166 def test_sequence_file_equality(self):
167 path = '/root/42BW9AAXX/C1-38'
168 name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l4_r1.tar.bz2'
170 f1_qseq = sequences.parse_qseq(path, name)
171 f2_qseq = sequences.parse_qseq(path, name)
173 self.failUnlessEqual(f1_qseq, f2_qseq)
177 Make sure that the quick and dirty sql interface in sequences works
180 db = sqlite3.connect(":memory:")
182 sequences.create_sequence_table(c)
184 data = [('/root/42BW9AAXX/C1-152',
185 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l1_r1.tar.bz2'),
186 ('/root/42BW9AAXX/C1-152',
187 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l1_r2.tar.bz2'),
188 ('/root/42BW9AAXX/C1-152',
189 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l2_r1.tar.bz2'),
190 ('/root/42BW9AAXX/C1-152',
191 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_l2_r21.tar.bz2'),]
193 for path, name in data:
194 seq = sequences.parse_qseq(path, name)
197 count = c.execute("select count(*) from sequences")
198 row = count.fetchone()
199 self.failUnlessEqual(row[0], 4)
203 return unittest.makeSuite(SequenceFileTests,'test')
205 if __name__ == "__main__":
206 unittest.main(defaultTest="suite")