3 from datetime import datetime, date
7 from unittest2 import TestCase
9 from htsworkflow.pipelines import firecrest
10 from htsworkflow.pipelines import bustard
11 from htsworkflow.pipelines import gerald
12 from htsworkflow.pipelines import runfolder
13 from htsworkflow.pipelines.runfolder import ElementTree
15 from htsworkflow.pipelines.test.simulate_runfolder import *
18 def make_summary_htm(gerald_dir):
19 summary_htm = """<!--RUN_TIME Mon Apr 21 11:52:25 2008 -->
20 <!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.31 2007/03/05 17:52:15 km Exp $-->
24 <a name="Top"><h2><title>080416_HWI-EAS229_0024_207BTAAXX Summary</title></h2></a>
25 <h1>Summary Information For Experiment 080416_HWI-EAS229_0024_207BTAAXX on Machine HWI-EAS229</h1>
26 <h2><br></br>Chip Summary<br></br></h2>
27 <table border="1" cellpadding="5">
28 <tr><td>Machine</td><td>HWI-EAS229</td></tr>
29 <tr><td>Run Folder</td><td>080416_HWI-EAS229_0024_207BTAAXX</td></tr>
30 <tr><td>Chip ID</td><td>unknown</td></tr>
32 <h2><br></br>Lane Parameter Summary<br></br></h2>
33 <table border="1" cellpadding="5">
37 <td>Sample Target</td>
49 <td>'((CHASTITY>=0.6))'</td>
50 <td><a href="#Lane1">Lane 1</a></td>
58 <td>'((CHASTITY>=0.6))'</td>
59 <td><a href="#Lane2">Lane 2</a></td>
67 <td>'((CHASTITY>=0.6))'</td>
68 <td><a href="#Lane3">Lane 3</a></td>
76 <td>'((CHASTITY>=0.6))'</td>
77 <td><a href="#Lane4">Lane 4</a></td>
85 <td>'((CHASTITY>=0.6))'</td>
86 <td><a href="#Lane5">Lane 5</a></td>
94 <td>'((CHASTITY>=0.6))'</td>
95 <td><a href="#Lane6">Lane 6</a></td>
103 <td>'((CHASTITY>=0.6))'</td>
104 <td><a href="#Lane7">Lane 7</a></td>
112 <td>'((CHASTITY>=0.6))'</td>
113 <td><a href="#Lane8">Lane 8</a></td>
116 <h2><br></br>Lane Results Summary<br></br></h2>
117 <table border="1" cellpadding="5">
122 <td>Av 1st Cycle Int </td>
123 <td>% intensity after 20 cycles </td>
124 <td>% PF Clusters </td>
125 <td>% Align (PF) </td>
126 <td>Av Alignment Score (PF) </td>
127 <td> % Error Rate (PF) </td>
131 <td>17421 +/- 2139</td>
132 <td>7230 +/- 801</td>
133 <td>23.73 +/- 10.79</td>
134 <td>13.00 +/- 22.91</td>
135 <td>32.03 +/- 18.45</td>
136 <td>6703.57 +/- 3753.85</td>
137 <td>4.55 +/- 4.81</td>
141 <td>20311 +/- 2402</td>
142 <td>7660 +/- 678</td>
143 <td>17.03 +/- 4.40</td>
144 <td>40.74 +/- 30.33</td>
145 <td>29.54 +/- 9.03</td>
146 <td>5184.02 +/- 1631.54</td>
147 <td>3.27 +/- 3.94</td>
151 <td>20193 +/- 2399</td>
152 <td>7700 +/- 797</td>
153 <td>15.75 +/- 3.30</td>
154 <td>56.56 +/- 17.16</td>
155 <td>27.33 +/- 7.48</td>
156 <td>4803.49 +/- 1313.31</td>
157 <td>3.07 +/- 2.86</td>
161 <td>15537 +/- 2531</td>
162 <td>7620 +/- 1392</td>
163 <td>15.37 +/- 3.79</td>
164 <td>63.05 +/- 18.30</td>
165 <td>15.88 +/- 4.99</td>
166 <td>3162.13 +/- 962.59</td>
167 <td>3.11 +/- 2.22</td>
171 <td>32047 +/- 3356</td>
172 <td>8093 +/- 831</td>
173 <td>23.79 +/- 6.18</td>
174 <td>53.36 +/- 18.06</td>
175 <td>48.04 +/- 13.77</td>
176 <td>9866.23 +/- 2877.30</td>
177 <td>2.26 +/- 1.16</td>
181 <td>32946 +/- 4753</td>
182 <td>8227 +/- 736</td>
183 <td>24.07 +/- 4.69</td>
184 <td>54.65 +/- 12.57</td>
185 <td>50.98 +/- 10.54</td>
186 <td>10468.86 +/- 2228.53</td>
187 <td>2.21 +/- 2.33</td>
191 <td>39504 +/- 4171</td>
192 <td>8401 +/- 785</td>
193 <td>22.55 +/- 4.56</td>
194 <td>45.22 +/- 10.34</td>
195 <td>48.41 +/- 9.67</td>
196 <td>9829.40 +/- 1993.20</td>
197 <td>2.26 +/- 1.11</td>
201 <td>37998 +/- 3792</td>
202 <td>8443 +/- 1211</td>
203 <td>39.03 +/- 7.52</td>
204 <td>42.16 +/- 12.35</td>
205 <td>40.98 +/- 14.89</td>
206 <td>8128.87 +/- 3055.34</td>
207 <td>3.57 +/- 2.77</td>
213 pathname = os.path.join(gerald_dir, 'Summary.htm')
214 f = open(pathname, 'w')
218 def make_eland_results(gerald_dir):
219 eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D.
220 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 chr2L.fa 8796855 R DD 24T
221 >HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0
222 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T
225 pathname = os.path.join(gerald_dir,
226 's_%d_eland_result.txt' % (i,))
227 f = open(pathname, 'w')
228 f.write(eland_result)
231 class RunfolderTests(TestCase):
233 Test components of the runfolder processing code
234 which includes firecrest, bustard, and gerald
237 # make a fake runfolder directory
238 self.temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
240 self.runfolder_dir = os.path.join(self.temp_dir,
241 '080102_HWI-EAS229_0010_207BTAAXX')
242 os.mkdir(self.runfolder_dir)
244 self.data_dir = os.path.join(self.runfolder_dir, 'Data')
245 os.mkdir(self.data_dir)
247 self.firecrest_dir = os.path.join(self.data_dir,
248 'C1-33_Firecrest1.8.28_12-04-2008_diane'
250 os.mkdir(self.firecrest_dir)
251 self.matrix_dir = os.path.join(self.firecrest_dir, 'Matrix')
252 os.mkdir(self.matrix_dir)
253 matrix_filename = os.path.join(self.matrix_dir, 's_matrix')
254 make_matrix(matrix_filename)
256 self.bustard_dir = os.path.join(self.firecrest_dir,
257 'Bustard1.8.28_12-04-2008_diane')
258 os.mkdir(self.bustard_dir)
259 make_phasing_params(self.bustard_dir)
261 self.gerald_dir = os.path.join(self.bustard_dir,
262 'GERALD_12-04-2008_diane')
263 os.mkdir(self.gerald_dir)
264 make_gerald_config_026(self.gerald_dir)
265 make_summary_htm(self.gerald_dir)
266 make_eland_results(self.gerald_dir)
269 shutil.rmtree(self.temp_dir)
271 def test_firecrest(self):
273 Construct a firecrest object
275 f = firecrest.firecrest(self.firecrest_dir)
276 self.failUnlessEqual(f.software, 'Firecrest')
277 self.failUnlessEqual(f.version, '1.8.28')
278 self.failUnlessEqual(f.start, 1)
279 self.failUnlessEqual(f.stop, 33)
280 self.failUnlessEqual(f.user, 'diane')
281 self.failUnlessEqual(f.date, date(2008,4,12))
283 xml = f.get_elements()
284 # just make sure that element tree can serialize the tree
285 xml_str = ElementTree.tostring(xml)
287 f2 = firecrest.Firecrest(xml=xml)
288 self.failUnlessEqual(f.software, f2.software)
289 self.failUnlessEqual(f.version, f2.version)
290 self.failUnlessEqual(f.start, f2.start)
291 self.failUnlessEqual(f.stop, f2.stop)
292 self.failUnlessEqual(f.user, f2.user)
293 self.failUnlessEqual(f.date, f2.date)
295 def test_bustard(self):
297 construct a bustard object
299 b = bustard.bustard(self.bustard_dir)
300 self.failUnlessEqual(b.software, 'Bustard')
301 self.failUnlessEqual(b.version, '1.8.28')
302 self.failUnlessEqual(b.date, date(2008,4,12))
303 self.failUnlessEqual(b.user, 'diane')
304 self.failUnlessEqual(len(b.phasing), 8)
305 self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099)
307 xml = b.get_elements()
308 b2 = bustard.Bustard(xml=xml)
309 self.failUnlessEqual(b.software, b2.software)
310 self.failUnlessEqual(b.version, b2.version)
311 self.failUnlessEqual(b.date, b2.date )
312 self.failUnlessEqual(b.user, b2.user)
313 self.failUnlessEqual(len(b.phasing), len(b2.phasing))
314 for key in b.phasing.keys():
315 self.failUnlessEqual(b.phasing[key].lane,
316 b2.phasing[key].lane)
317 self.failUnlessEqual(b.phasing[key].phasing,
318 b2.phasing[key].phasing)
319 self.failUnlessEqual(b.phasing[key].prephasing,
320 b2.phasing[key].prephasing)
322 def test_gerald(self):
323 # need to update gerald and make tests for it
324 g = gerald.gerald(self.gerald_dir)
326 self.failUnlessEqual(g.software, 'GERALD')
327 self.failUnlessEqual(g.version, '1.68.2.2')
328 self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30))
329 self.failUnlessEqual(len(g.lanes), len(g.lanes.keys()))
330 self.failUnlessEqual(len(g.lanes), len(g.lanes.items()))
333 # list of genomes, matches what was defined up in
334 # make_gerald_config.
335 # the first None is to offset the genomes list to be 1..9
336 # instead of pythons default 0..8
337 genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2',
338 '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ]
340 # test lane specific parameters from gerald config file
342 cur_lane = g.lanes[i]
343 self.failUnlessEqual(cur_lane.analysis, 'eland')
344 self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
345 self.failUnlessEqual(cur_lane.read_length, '32')
346 self.failUnlessEqual(cur_lane.use_bases, 'Y'*32)
348 # test data extracted from summary file
350 (17421, 2139), (20311, 2402), (20193, 2399), (15537, 2531),
351 (32047, 3356), (32946, 4753), (39504, 4171), (37998, 3792)]
353 self.failUnlessEqual(len(g.summary), 1)
355 summary_lane = g.summary[0][i]
356 self.failUnlessEqual(summary_lane.cluster, clusters[i])
357 self.failUnlessEqual(summary_lane.lane, i)
359 xml = g.get_elements()
360 # just make sure that element tree can serialize the tree
361 xml_str = ElementTree.tostring(xml)
362 g2 = gerald.Gerald(xml=xml)
364 # do it all again after extracting from the xml file
365 self.failUnlessEqual(g.version, g2.version)
366 self.failUnlessEqual(g.date, g2.date)
367 self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys()))
368 self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items()))
370 # test lane specific parameters from gerald config file
373 g2_lane = g2.lanes[i]
374 self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
375 self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
376 self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
377 self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
379 self.failUnlessEqual(len(g.summary), 1)
380 # test (some) summary elements
382 g_summary = g.summary[0][i]
383 g2_summary = g2.summary[0][i]
384 self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
385 self.failUnlessEqual(g_summary.lane, g2_summary.lane)
387 g_eland = g.eland_results
388 g2_eland = g2.eland_results
390 g_results = g_eland[key]
391 g2_results = g2_eland[key]
392 self.failUnlessEqual(g_results.reads,
394 self.failUnlessEqual(len(g_results.mapped_reads),
395 len(g2_results.mapped_reads))
396 for k in g_results.mapped_reads.keys():
397 self.failUnlessEqual(g_results.mapped_reads[k],
398 g2_results.mapped_reads[k])
400 self.failUnlessEqual(len(g_results.match_codes),
401 len(g2_results.match_codes))
402 for k in g_results.match_codes.keys():
403 self.failUnlessEqual(g_results.match_codes[k],
404 g2_results.match_codes[k])
407 def test_eland(self):
408 dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
409 'chr2L.fa': 'dm3/chr2L.fa',
410 'Lambda.fa': 'Lambda.fa'}
411 genome_maps = { 1:dm3_map, 2:dm3_map, 3:dm3_map, 4:dm3_map,
412 5:dm3_map, 6:dm3_map, 7:dm3_map, 8:dm3_map }
413 eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
417 self.failUnlessEqual(lane.reads, 4)
418 self.failUnlessEqual(lane.sample_name, "s")
419 self.failUnlessEqual(lane.lane_id, key.lane)
420 self.failUnlessEqual(len(lane.mapped_reads), 3)
421 self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
422 self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
423 self.failUnlessEqual(lane.match_codes['U1'], 2)
424 self.failUnlessEqual(lane.match_codes['NM'], 1)
426 xml = eland.get_elements()
427 # just make sure that element tree can serialize the tree
428 xml_str = ElementTree.tostring(xml)
429 e2 = gerald.ELAND(xml=xml)
434 self.failUnlessEqual(l1.reads, l2.reads)
435 self.failUnlessEqual(l1.sample_name, l2.sample_name)
436 self.failUnlessEqual(l1.lane_id, l2.lane_id)
437 self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
438 self.failUnlessEqual(len(l1.mapped_reads), 3)
439 for k in l1.mapped_reads.keys():
440 self.failUnlessEqual(l1.mapped_reads[k],
443 self.failUnlessEqual(len(l1.match_codes), 9)
444 self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
445 for k in l1.match_codes.keys():
446 self.failUnlessEqual(l1.match_codes[k],
449 def test_runfolder(self):
450 runs = runfolder.get_runs(self.runfolder_dir)
452 # do we get the flowcell id from the filename?
453 self.failUnlessEqual(len(runs), 1)
454 self.failUnlessEqual(runs[0].name, 'run_207BTAAXX_2008-04-19.xml')
456 # do we get the flowcell id from the FlowcellId.xml file
457 make_flowcell_id(self.runfolder_dir, '207BTAAXY')
458 runs = runfolder.get_runs(self.runfolder_dir)
459 self.failUnlessEqual(len(runs), 1)
460 self.failUnlessEqual(runs[0].name, 'run_207BTAAXY_2008-04-19.xml')
463 xml = r1.get_elements()
464 xml_str = ElementTree.tostring(xml)
466 r2 = runfolder.PipelineRun(xml=xml)
467 self.failUnlessEqual(r1.name, r2.name)
468 self.failIfEqual(r2.image_analysis, None)
469 self.failIfEqual(r2.bustard, None)
470 self.failIfEqual(r2.gerald, None)
474 from unittest2 import TestSuite, defaultTestLoader
476 suite.addTests(defaultTestLoader.loadTestsFromTestCase(RunfolderTests))
480 if __name__ == "__main__":
481 from unittest2 import main
482 main(defaultTest="suite")