2 from __future__ import absolute_import
4 from datetime import datetime, date
8 from unittest import TestCase
10 from htsworkflow.pipelines import firecrest
11 from htsworkflow.pipelines import bustard
12 from htsworkflow.pipelines import gerald
13 from htsworkflow.pipelines import runfolder
14 from htsworkflow.pipelines import ElementTree
16 from .simulate_runfolder import *
19 def make_summary_htm(gerald_dir):
20 summary_htm = """<!--RUN_TIME Mon Apr 21 11:52:25 2008 -->
21 <!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.31 2007/03/05 17:52:15 km Exp $-->
25 <a name="Top"><h2><title>080416_HWI-EAS229_0024_207BTAAXX Summary</title></h2></a>
26 <h1>Summary Information For Experiment 080416_HWI-EAS229_0024_207BTAAXX on Machine HWI-EAS229</h1>
27 <h2><br></br>Chip Summary<br></br></h2>
28 <table border="1" cellpadding="5">
29 <tr><td>Machine</td><td>HWI-EAS229</td></tr>
30 <tr><td>Run Folder</td><td>080416_HWI-EAS229_0024_207BTAAXX</td></tr>
31 <tr><td>Chip ID</td><td>unknown</td></tr>
33 <h2><br></br>Lane Parameter Summary<br></br></h2>
34 <table border="1" cellpadding="5">
38 <td>Sample Target</td>
50 <td>'((CHASTITY>=0.6))'</td>
51 <td><a href="#Lane1">Lane 1</a></td>
59 <td>'((CHASTITY>=0.6))'</td>
60 <td><a href="#Lane2">Lane 2</a></td>
68 <td>'((CHASTITY>=0.6))'</td>
69 <td><a href="#Lane3">Lane 3</a></td>
77 <td>'((CHASTITY>=0.6))'</td>
78 <td><a href="#Lane4">Lane 4</a></td>
86 <td>'((CHASTITY>=0.6))'</td>
87 <td><a href="#Lane5">Lane 5</a></td>
95 <td>'((CHASTITY>=0.6))'</td>
96 <td><a href="#Lane6">Lane 6</a></td>
104 <td>'((CHASTITY>=0.6))'</td>
105 <td><a href="#Lane7">Lane 7</a></td>
113 <td>'((CHASTITY>=0.6))'</td>
114 <td><a href="#Lane8">Lane 8</a></td>
117 <h2><br></br>Lane Results Summary<br></br></h2>
118 <table border="1" cellpadding="5">
123 <td>Av 1st Cycle Int </td>
124 <td>% intensity after 20 cycles </td>
125 <td>% PF Clusters </td>
126 <td>% Align (PF) </td>
127 <td>Av Alignment Score (PF) </td>
128 <td> % Error Rate (PF) </td>
132 <td>17421 +/- 2139</td>
133 <td>7230 +/- 801</td>
134 <td>23.73 +/- 10.79</td>
135 <td>13.00 +/- 22.91</td>
136 <td>32.03 +/- 18.45</td>
137 <td>6703.57 +/- 3753.85</td>
138 <td>4.55 +/- 4.81</td>
142 <td>20311 +/- 2402</td>
143 <td>7660 +/- 678</td>
144 <td>17.03 +/- 4.40</td>
145 <td>40.74 +/- 30.33</td>
146 <td>29.54 +/- 9.03</td>
147 <td>5184.02 +/- 1631.54</td>
148 <td>3.27 +/- 3.94</td>
152 <td>20193 +/- 2399</td>
153 <td>7700 +/- 797</td>
154 <td>15.75 +/- 3.30</td>
155 <td>56.56 +/- 17.16</td>
156 <td>27.33 +/- 7.48</td>
157 <td>4803.49 +/- 1313.31</td>
158 <td>3.07 +/- 2.86</td>
162 <td>15537 +/- 2531</td>
163 <td>7620 +/- 1392</td>
164 <td>15.37 +/- 3.79</td>
165 <td>63.05 +/- 18.30</td>
166 <td>15.88 +/- 4.99</td>
167 <td>3162.13 +/- 962.59</td>
168 <td>3.11 +/- 2.22</td>
172 <td>32047 +/- 3356</td>
173 <td>8093 +/- 831</td>
174 <td>23.79 +/- 6.18</td>
175 <td>53.36 +/- 18.06</td>
176 <td>48.04 +/- 13.77</td>
177 <td>9866.23 +/- 2877.30</td>
178 <td>2.26 +/- 1.16</td>
182 <td>32946 +/- 4753</td>
183 <td>8227 +/- 736</td>
184 <td>24.07 +/- 4.69</td>
185 <td>54.65 +/- 12.57</td>
186 <td>50.98 +/- 10.54</td>
187 <td>10468.86 +/- 2228.53</td>
188 <td>2.21 +/- 2.33</td>
192 <td>39504 +/- 4171</td>
193 <td>8401 +/- 785</td>
194 <td>22.55 +/- 4.56</td>
195 <td>45.22 +/- 10.34</td>
196 <td>48.41 +/- 9.67</td>
197 <td>9829.40 +/- 1993.20</td>
198 <td>2.26 +/- 1.11</td>
202 <td>37998 +/- 3792</td>
203 <td>8443 +/- 1211</td>
204 <td>39.03 +/- 7.52</td>
205 <td>42.16 +/- 12.35</td>
206 <td>40.98 +/- 14.89</td>
207 <td>8128.87 +/- 3055.34</td>
208 <td>3.57 +/- 2.77</td>
214 pathname = os.path.join(gerald_dir, 'Summary.htm')
215 f = open(pathname, 'w')
219 def make_eland_results(gerald_dir):
220 eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D.
221 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 chr2L.fa 8796855 R DD 24T
222 >HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0
223 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T
226 pathname = os.path.join(gerald_dir,
227 's_%d_eland_result.txt' % (i,))
228 f = open(pathname, 'w')
229 f.write(eland_result)
232 class RunfolderTests(TestCase):
234 Test components of the runfolder processing code
235 which includes firecrest, bustard, and gerald
238 # make a fake runfolder directory
239 self.temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
241 self.runfolder_dir = os.path.join(self.temp_dir,
242 '080102_HWI-EAS229_0010_207BTAAXX')
243 os.mkdir(self.runfolder_dir)
245 self.data_dir = os.path.join(self.runfolder_dir, 'Data')
246 os.mkdir(self.data_dir)
248 self.firecrest_dir = os.path.join(self.data_dir,
249 'C1-33_Firecrest1.8.28_12-04-2008_diane'
251 os.mkdir(self.firecrest_dir)
252 self.matrix_dir = os.path.join(self.firecrest_dir, 'Matrix')
253 os.mkdir(self.matrix_dir)
254 matrix_filename = os.path.join(self.matrix_dir, 's_matrix')
255 make_matrix(matrix_filename)
257 self.bustard_dir = os.path.join(self.firecrest_dir,
258 'Bustard1.8.28_12-04-2008_diane')
259 os.mkdir(self.bustard_dir)
260 make_phasing_params(self.bustard_dir)
262 self.gerald_dir = os.path.join(self.bustard_dir,
263 'GERALD_12-04-2008_diane')
264 os.mkdir(self.gerald_dir)
265 make_gerald_config_026(self.gerald_dir)
266 make_summary_htm(self.gerald_dir)
267 make_eland_results(self.gerald_dir)
270 shutil.rmtree(self.temp_dir)
272 def test_firecrest(self):
274 Construct a firecrest object
276 f = firecrest.firecrest(self.firecrest_dir)
277 self.failUnlessEqual(f.software, 'Firecrest')
278 self.failUnlessEqual(f.version, '1.8.28')
279 self.failUnlessEqual(f.start, 1)
280 self.failUnlessEqual(f.stop, 33)
281 self.failUnlessEqual(f.user, 'diane')
282 self.failUnlessEqual(f.date, date(2008,4,12))
284 xml = f.get_elements()
285 # just make sure that element tree can serialize the tree
286 xml_str = ElementTree.tostring(xml)
288 f2 = firecrest.Firecrest(xml=xml)
289 self.failUnlessEqual(f.software, f2.software)
290 self.failUnlessEqual(f.version, f2.version)
291 self.failUnlessEqual(f.start, f2.start)
292 self.failUnlessEqual(f.stop, f2.stop)
293 self.failUnlessEqual(f.user, f2.user)
294 self.failUnlessEqual(f.date, f2.date)
296 def test_bustard(self):
298 construct a bustard object
300 b = bustard.bustard(self.bustard_dir)
301 self.failUnlessEqual(b.software, 'Bustard')
302 self.failUnlessEqual(b.version, '1.8.28')
303 self.failUnlessEqual(b.date, date(2008,4,12))
304 self.failUnlessEqual(b.user, 'diane')
305 self.failUnlessEqual(len(b.phasing), 8)
306 self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099)
308 xml = b.get_elements()
309 b2 = bustard.Bustard(xml=xml)
310 self.failUnlessEqual(b.software, b2.software)
311 self.failUnlessEqual(b.version, b2.version)
312 self.failUnlessEqual(b.date, b2.date )
313 self.failUnlessEqual(b.user, b2.user)
314 self.failUnlessEqual(len(b.phasing), len(b2.phasing))
315 for key in b.phasing.keys():
316 self.failUnlessEqual(b.phasing[key].lane,
317 b2.phasing[key].lane)
318 self.failUnlessEqual(b.phasing[key].phasing,
319 b2.phasing[key].phasing)
320 self.failUnlessEqual(b.phasing[key].prephasing,
321 b2.phasing[key].prephasing)
323 def test_gerald(self):
324 # need to update gerald and make tests for it
325 g = gerald.gerald(self.gerald_dir)
327 self.failUnlessEqual(g.software, 'GERALD')
328 self.failUnlessEqual(g.version, '1.68.2.2')
329 self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30))
330 self.failUnlessEqual(len(g.lanes), len(g.lanes.keys()))
331 self.failUnlessEqual(len(g.lanes), len(g.lanes.items()))
334 # list of genomes, matches what was defined up in
335 # make_gerald_config.
336 # the first None is to offset the genomes list to be 1..9
337 # instead of pythons default 0..8
338 genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2',
339 '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ]
341 # test lane specific parameters from gerald config file
343 cur_lane = g.lanes[i]
344 self.failUnlessEqual(cur_lane.analysis, 'eland')
345 self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
346 self.failUnlessEqual(cur_lane.read_length, '32')
347 self.failUnlessEqual(cur_lane.use_bases, 'Y'*32)
349 # test data extracted from summary file
351 (17421, 2139), (20311, 2402), (20193, 2399), (15537, 2531),
352 (32047, 3356), (32946, 4753), (39504, 4171), (37998, 3792)]
354 self.failUnlessEqual(len(g.summary), 1)
356 summary_lane = g.summary[0][i]
357 self.failUnlessEqual(summary_lane.cluster, clusters[i])
358 self.failUnlessEqual(summary_lane.lane, i)
360 xml = g.get_elements()
361 # just make sure that element tree can serialize the tree
362 xml_str = ElementTree.tostring(xml)
363 g2 = gerald.Gerald(xml=xml)
365 # do it all again after extracting from the xml file
366 self.failUnlessEqual(g.version, g2.version)
367 self.failUnlessEqual(g.date, g2.date)
368 self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys()))
369 self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items()))
371 # test lane specific parameters from gerald config file
374 g2_lane = g2.lanes[i]
375 self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
376 self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
377 self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
378 self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
380 self.failUnlessEqual(len(g.summary), 1)
381 # test (some) summary elements
383 g_summary = g.summary[0][i]
384 g2_summary = g2.summary[0][i]
385 self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
386 self.failUnlessEqual(g_summary.lane, g2_summary.lane)
388 g_eland = g.eland_results
389 g2_eland = g2.eland_results
391 g_results = g_eland[key]
392 g2_results = g2_eland[key]
393 self.failUnlessEqual(g_results.reads,
395 self.failUnlessEqual(len(g_results.mapped_reads),
396 len(g2_results.mapped_reads))
397 for k in g_results.mapped_reads.keys():
398 self.failUnlessEqual(g_results.mapped_reads[k],
399 g2_results.mapped_reads[k])
401 self.failUnlessEqual(len(g_results.match_codes),
402 len(g2_results.match_codes))
403 for k in g_results.match_codes.keys():
404 self.failUnlessEqual(g_results.match_codes[k],
405 g2_results.match_codes[k])
408 def test_eland(self):
409 dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
410 'chr2L.fa': 'dm3/chr2L.fa',
411 'Lambda.fa': 'Lambda.fa'}
412 genome_maps = { 1:dm3_map, 2:dm3_map, 3:dm3_map, 4:dm3_map,
413 5:dm3_map, 6:dm3_map, 7:dm3_map, 8:dm3_map }
414 eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
418 self.failUnlessEqual(lane.reads, 4)
419 self.failUnlessEqual(lane.sample_name, "s")
420 self.failUnlessEqual(lane.lane_id, key.lane)
421 self.failUnlessEqual(len(lane.mapped_reads), 3)
422 self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
423 self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
424 self.failUnlessEqual(lane.match_codes['U1'], 2)
425 self.failUnlessEqual(lane.match_codes['NM'], 1)
427 xml = eland.get_elements()
428 # just make sure that element tree can serialize the tree
429 xml_str = ElementTree.tostring(xml)
430 e2 = gerald.ELAND(xml=xml)
435 self.failUnlessEqual(l1.reads, l2.reads)
436 self.failUnlessEqual(l1.sample_name, l2.sample_name)
437 self.failUnlessEqual(l1.lane_id, l2.lane_id)
438 self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
439 self.failUnlessEqual(len(l1.mapped_reads), 3)
440 for k in l1.mapped_reads.keys():
441 self.failUnlessEqual(l1.mapped_reads[k],
444 self.failUnlessEqual(len(l1.match_codes), 9)
445 self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
446 for k in l1.match_codes.keys():
447 self.failUnlessEqual(l1.match_codes[k],
450 def test_runfolder(self):
451 runs = runfolder.get_runs(self.runfolder_dir)
453 # do we get the flowcell id from the filename?
454 self.failUnlessEqual(len(runs), 1)
455 self.failUnlessEqual(runs[0].serialization_filename, 'run_207BTAAXX_2008-04-19.xml')
457 # do we get the flowcell id from the FlowcellId.xml file
458 make_flowcell_id(self.runfolder_dir, '207BTAAXY')
459 runs = runfolder.get_runs(self.runfolder_dir)
460 self.failUnlessEqual(len(runs), 1)
461 self.failUnlessEqual(runs[0].serialization_filename, 'run_207BTAAXY_2008-04-19.xml')
464 xml = r1.get_elements()
465 xml_str = ElementTree.tostring(xml)
467 r2 = runfolder.PipelineRun(xml=xml)
468 self.failUnlessEqual(r1.serialization_filename, r2.serialization_filename)
469 self.failIfEqual(r2.image_analysis, None)
470 self.failIfEqual(r2.bustard, None)
471 self.failIfEqual(r2.gerald, None)
475 from unittest import TestSuite, defaultTestLoader
477 suite.addTests(defaultTestLoader.loadTestsFromTestCase(RunfolderTests))
481 if __name__ == "__main__":
482 from unittest import main
483 main(defaultTest="suite")