3 from datetime import datetime, date
9 from htsworkflow.pipelines import firecrest
10 from htsworkflow.pipelines import bustard
11 from htsworkflow.pipelines import gerald
12 from htsworkflow.pipelines import runfolder
13 from htsworkflow.pipelines.runfolder import ElementTree
15 from htsworkflow.pipelines.test.simulate_runfolder import *
18 def make_summary_htm(gerald_dir):
19 summary_htm = """<!--RUN_TIME Mon Apr 21 11:52:25 2008 -->
20 <!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.31 2007/03/05 17:52:15 km Exp $-->
24 <a name="Top"><h2><title>080416_HWI-EAS229_0024_207BTAAXX Summary</title></h2></a>
25 <h1>Summary Information For Experiment 080416_HWI-EAS229_0024_207BTAAXX on Machine HWI-EAS229</h1>
26 <h2><br></br>Chip Summary<br></br></h2>
27 <table border="1" cellpadding="5">
28 <tr><td>Machine</td><td>HWI-EAS229</td></tr>
29 <tr><td>Run Folder</td><td>080416_HWI-EAS229_0024_207BTAAXX</td></tr>
30 <tr><td>Chip ID</td><td>unknown</td></tr>
32 <h2><br></br>Lane Parameter Summary<br></br></h2>
33 <table border="1" cellpadding="5">
37 <td>Sample Target</td>
49 <td>'((CHASTITY>=0.6))'</td>
50 <td><a href="#Lane1">Lane 1</a></td>
58 <td>'((CHASTITY>=0.6))'</td>
59 <td><a href="#Lane2">Lane 2</a></td>
67 <td>'((CHASTITY>=0.6))'</td>
68 <td><a href="#Lane3">Lane 3</a></td>
76 <td>'((CHASTITY>=0.6))'</td>
77 <td><a href="#Lane4">Lane 4</a></td>
85 <td>'((CHASTITY>=0.6))'</td>
86 <td><a href="#Lane5">Lane 5</a></td>
94 <td>'((CHASTITY>=0.6))'</td>
95 <td><a href="#Lane6">Lane 6</a></td>
103 <td>'((CHASTITY>=0.6))'</td>
104 <td><a href="#Lane7">Lane 7</a></td>
112 <td>'((CHASTITY>=0.6))'</td>
113 <td><a href="#Lane8">Lane 8</a></td>
116 <h2><br></br>Lane Results Summary<br></br></h2>
117 <table border="1" cellpadding="5">
122 <td>Av 1st Cycle Int </td>
123 <td>% intensity after 20 cycles </td>
124 <td>% PF Clusters </td>
125 <td>% Align (PF) </td>
126 <td>Av Alignment Score (PF) </td>
127 <td> % Error Rate (PF) </td>
131 <td>17421 +/- 2139</td>
132 <td>7230 +/- 801</td>
133 <td>23.73 +/- 10.79</td>
134 <td>13.00 +/- 22.91</td>
135 <td>32.03 +/- 18.45</td>
136 <td>6703.57 +/- 3753.85</td>
137 <td>4.55 +/- 4.81</td>
141 <td>20311 +/- 2402</td>
142 <td>7660 +/- 678</td>
143 <td>17.03 +/- 4.40</td>
144 <td>40.74 +/- 30.33</td>
145 <td>29.54 +/- 9.03</td>
146 <td>5184.02 +/- 1631.54</td>
147 <td>3.27 +/- 3.94</td>
151 <td>20193 +/- 2399</td>
152 <td>7700 +/- 797</td>
153 <td>15.75 +/- 3.30</td>
154 <td>56.56 +/- 17.16</td>
155 <td>27.33 +/- 7.48</td>
156 <td>4803.49 +/- 1313.31</td>
157 <td>3.07 +/- 2.86</td>
161 <td>15537 +/- 2531</td>
162 <td>7620 +/- 1392</td>
163 <td>15.37 +/- 3.79</td>
164 <td>63.05 +/- 18.30</td>
165 <td>15.88 +/- 4.99</td>
166 <td>3162.13 +/- 962.59</td>
167 <td>3.11 +/- 2.22</td>
171 <td>32047 +/- 3356</td>
172 <td>8093 +/- 831</td>
173 <td>23.79 +/- 6.18</td>
174 <td>53.36 +/- 18.06</td>
175 <td>48.04 +/- 13.77</td>
176 <td>9866.23 +/- 2877.30</td>
177 <td>2.26 +/- 1.16</td>
181 <td>32946 +/- 4753</td>
182 <td>8227 +/- 736</td>
183 <td>24.07 +/- 4.69</td>
184 <td>54.65 +/- 12.57</td>
185 <td>50.98 +/- 10.54</td>
186 <td>10468.86 +/- 2228.53</td>
187 <td>2.21 +/- 2.33</td>
191 <td>39504 +/- 4171</td>
192 <td>8401 +/- 785</td>
193 <td>22.55 +/- 4.56</td>
194 <td>45.22 +/- 10.34</td>
195 <td>48.41 +/- 9.67</td>
196 <td>9829.40 +/- 1993.20</td>
197 <td>2.26 +/- 1.11</td>
201 <td>37998 +/- 3792</td>
202 <td>8443 +/- 1211</td>
203 <td>39.03 +/- 7.52</td>
204 <td>42.16 +/- 12.35</td>
205 <td>40.98 +/- 14.89</td>
206 <td>8128.87 +/- 3055.34</td>
207 <td>3.57 +/- 2.77</td>
213 pathname = os.path.join(gerald_dir, 'Summary.htm')
214 f = open(pathname, 'w')
218 def make_eland_results(gerald_dir):
219 eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D.
220 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 chr2L.fa 8796855 R DD 24T
221 >HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0
222 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T
225 pathname = os.path.join(gerald_dir,
226 's_%d_eland_result.txt' % (i,))
227 f = open(pathname, 'w')
228 f.write(eland_result)
231 class RunfolderTests(unittest.TestCase):
233 Test components of the runfolder processing code
234 which includes firecrest, bustard, and gerald
237 # make a fake runfolder directory
238 self.temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
240 self.runfolder_dir = os.path.join(self.temp_dir,
241 '080102_HWI-EAS229_0010_207BTAAXX')
242 os.mkdir(self.runfolder_dir)
244 self.data_dir = os.path.join(self.runfolder_dir, 'Data')
245 os.mkdir(self.data_dir)
247 self.firecrest_dir = os.path.join(self.data_dir,
248 'C1-33_Firecrest1.8.28_12-04-2008_diane'
250 os.mkdir(self.firecrest_dir)
251 self.matrix_dir = os.path.join(self.firecrest_dir, 'Matrix')
252 os.mkdir(self.matrix_dir)
253 make_matrix(self.matrix_dir)
255 self.bustard_dir = os.path.join(self.firecrest_dir,
256 'Bustard1.8.28_12-04-2008_diane')
257 os.mkdir(self.bustard_dir)
258 make_phasing_params(self.bustard_dir)
260 self.gerald_dir = os.path.join(self.bustard_dir,
261 'GERALD_12-04-2008_diane')
262 os.mkdir(self.gerald_dir)
263 make_gerald_config(self.gerald_dir)
264 make_summary_htm(self.gerald_dir)
265 make_eland_results(self.gerald_dir)
268 shutil.rmtree(self.temp_dir)
270 def test_firecrest(self):
272 Construct a firecrest object
274 f = firecrest.firecrest(self.firecrest_dir)
275 self.failUnlessEqual(f.version, '1.8.28')
276 self.failUnlessEqual(f.start, 1)
277 self.failUnlessEqual(f.stop, 33)
278 self.failUnlessEqual(f.user, 'diane')
279 self.failUnlessEqual(f.date, date(2008,4,12))
281 xml = f.get_elements()
282 # just make sure that element tree can serialize the tree
283 xml_str = ElementTree.tostring(xml)
285 f2 = firecrest.Firecrest(xml=xml)
286 self.failUnlessEqual(f.version, f2.version)
287 self.failUnlessEqual(f.start, f2.start)
288 self.failUnlessEqual(f.stop, f2.stop)
289 self.failUnlessEqual(f.user, f2.user)
290 self.failUnlessEqual(f.date, f2.date)
292 def test_bustard(self):
294 construct a bustard object
296 b = bustard.bustard(self.bustard_dir)
297 self.failUnlessEqual(b.version, '1.8.28')
298 self.failUnlessEqual(b.date, date(2008,4,12))
299 self.failUnlessEqual(b.user, 'diane')
300 self.failUnlessEqual(len(b.phasing), 8)
301 self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099)
303 xml = b.get_elements()
304 b2 = bustard.Bustard(xml=xml)
305 self.failUnlessEqual(b.version, b2.version)
306 self.failUnlessEqual(b.date, b2.date )
307 self.failUnlessEqual(b.user, b2.user)
308 self.failUnlessEqual(len(b.phasing), len(b2.phasing))
309 for key in b.phasing.keys():
310 self.failUnlessEqual(b.phasing[key].lane,
311 b2.phasing[key].lane)
312 self.failUnlessEqual(b.phasing[key].phasing,
313 b2.phasing[key].phasing)
314 self.failUnlessEqual(b.phasing[key].prephasing,
315 b2.phasing[key].prephasing)
317 def test_gerald(self):
318 # need to update gerald and make tests for it
319 g = gerald.gerald(self.gerald_dir)
321 self.failUnlessEqual(g.version,
322 '@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp')
323 self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30))
324 self.failUnlessEqual(len(g.lanes), len(g.lanes.keys()))
325 self.failUnlessEqual(len(g.lanes), len(g.lanes.items()))
328 # list of genomes, matches what was defined up in
329 # make_gerald_config.
330 # the first None is to offset the genomes list to be 1..9
331 # instead of pythons default 0..8
332 genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2',
333 '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ]
335 # test lane specific parameters from gerald config file
337 cur_lane = g.lanes[i]
338 self.failUnlessEqual(cur_lane.analysis, 'eland')
339 self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
340 self.failUnlessEqual(cur_lane.read_length, '32')
341 self.failUnlessEqual(cur_lane.use_bases, 'Y'*32)
343 # test data extracted from summary file
345 (17421, 2139), (20311, 2402), (20193, 2399), (15537, 2531),
346 (32047, 3356), (32946, 4753), (39504, 4171), (37998, 3792)]
348 self.failUnlessEqual(len(g.summary), 1)
350 summary_lane = g.summary[0][i]
351 self.failUnlessEqual(summary_lane.cluster, clusters[i])
352 self.failUnlessEqual(summary_lane.lane, i)
354 xml = g.get_elements()
355 # just make sure that element tree can serialize the tree
356 xml_str = ElementTree.tostring(xml)
357 g2 = gerald.Gerald(xml=xml)
359 # do it all again after extracting from the xml file
360 self.failUnlessEqual(g.version, g2.version)
361 self.failUnlessEqual(g.date, g2.date)
362 self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys()))
363 self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items()))
365 # test lane specific parameters from gerald config file
368 g2_lane = g2.lanes[i]
369 self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
370 self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
371 self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
372 self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
374 self.failUnlessEqual(len(g.summary), 1)
375 # test (some) summary elements
377 g_summary = g.summary[0][i]
378 g2_summary = g2.summary[0][i]
379 self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
380 self.failUnlessEqual(g_summary.lane, g2_summary.lane)
382 g_eland = g.eland_results
383 g2_eland = g2.eland_results
384 for lane in g_eland.results[0].keys():
385 g_results = g_eland.results[0][lane]
386 g2_results = g2_eland.results[0][lane]
387 self.failUnlessEqual(g_results.reads,
389 self.failUnlessEqual(len(g_results.mapped_reads),
390 len(g2_results.mapped_reads))
391 for k in g_results.mapped_reads.keys():
392 self.failUnlessEqual(g_results.mapped_reads[k],
393 g2_results.mapped_reads[k])
395 self.failUnlessEqual(len(g_results.match_codes),
396 len(g2_results.match_codes))
397 for k in g_results.match_codes.keys():
398 self.failUnlessEqual(g_results.match_codes[k],
399 g2_results.match_codes[k])
402 def test_eland(self):
403 dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
404 'chr2L.fa': 'dm3/chr2L.fa',
405 'Lambda.fa': 'Lambda.fa'}
406 genome_maps = { 1:dm3_map, 2:dm3_map, 3:dm3_map, 4:dm3_map,
407 5:dm3_map, 6:dm3_map, 7:dm3_map, 8:dm3_map }
408 eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
411 lane = eland.results[0][i]
412 self.failUnlessEqual(lane.reads, 4)
413 self.failUnlessEqual(lane.sample_name, "s")
414 self.failUnlessEqual(lane.lane_id, i)
415 self.failUnlessEqual(len(lane.mapped_reads), 3)
416 self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
417 self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
418 self.failUnlessEqual(lane.match_codes['U1'], 2)
419 self.failUnlessEqual(lane.match_codes['NM'], 1)
421 xml = eland.get_elements()
422 # just make sure that element tree can serialize the tree
423 xml_str = ElementTree.tostring(xml)
424 e2 = gerald.ELAND(xml=xml)
427 l1 = eland.results[0][i]
428 l2 = e2.results[0][i]
429 self.failUnlessEqual(l1.reads, l2.reads)
430 self.failUnlessEqual(l1.sample_name, l2.sample_name)
431 self.failUnlessEqual(l1.lane_id, l2.lane_id)
432 self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
433 self.failUnlessEqual(len(l1.mapped_reads), 3)
434 for k in l1.mapped_reads.keys():
435 self.failUnlessEqual(l1.mapped_reads[k],
438 self.failUnlessEqual(len(l1.match_codes), 9)
439 self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
440 for k in l1.match_codes.keys():
441 self.failUnlessEqual(l1.match_codes[k],
444 def test_runfolder(self):
445 runs = runfolder.get_runs(self.runfolder_dir)
447 # do we get the flowcell id from the filename?
448 self.failUnlessEqual(len(runs), 1)
449 self.failUnlessEqual(runs[0].name, 'run_207BTAAXX_2008-04-19.xml')
451 # do we get the flowcell id from the FlowcellId.xml file
452 make_flowcell_id(self.runfolder_dir, '207BTAAXY')
453 runs = runfolder.get_runs(self.runfolder_dir)
454 self.failUnlessEqual(len(runs), 1)
455 self.failUnlessEqual(runs[0].name, 'run_207BTAAXY_2008-04-19.xml')
458 xml = r1.get_elements()
459 xml_str = ElementTree.tostring(xml)
461 r2 = runfolder.PipelineRun(xml=xml)
462 self.failUnlessEqual(r1.name, r2.name)
463 self.failIfEqual(r2.image_analysis, None)
464 self.failIfEqual(r2.bustard, None)
465 self.failIfEqual(r2.gerald, None)
469 return unittest.makeSuite(RunfolderTests,'test')
471 if __name__ == "__main__":
472 unittest.main(defaultTest="suite")