Change unittest2 back into unittest.
[htsworkflow.git] / htsworkflow / pipelines / test / test_runfolder026.py
1 #!/usr/bin/env python
2
3 from datetime import datetime, date
4 import os
5 import tempfile
6 import shutil
7 from unittest import TestCase
8
9 from htsworkflow.pipelines import firecrest
10 from htsworkflow.pipelines import bustard
11 from htsworkflow.pipelines import gerald
12 from htsworkflow.pipelines import runfolder
13 from htsworkflow.pipelines import ElementTree
14
15 from htsworkflow.pipelines.test.simulate_runfolder import *
16
17
18 def make_summary_htm(gerald_dir):
19     summary_htm = """<!--RUN_TIME Mon Apr 21 11:52:25 2008 -->
20 <!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.31 2007/03/05 17:52:15 km Exp $-->
21 <html>
22 <body>
23
24 <a name="Top"><h2><title>080416_HWI-EAS229_0024_207BTAAXX Summary</title></h2></a>
25 <h1>Summary Information For Experiment 080416_HWI-EAS229_0024_207BTAAXX on Machine HWI-EAS229</h1>
26 <h2><br></br>Chip Summary<br></br></h2>
27 <table border="1" cellpadding="5">
28 <tr><td>Machine</td><td>HWI-EAS229</td></tr>
29 <tr><td>Run Folder</td><td>080416_HWI-EAS229_0024_207BTAAXX</td></tr>
30 <tr><td>Chip ID</td><td>unknown</td></tr>
31 </table>
32 <h2><br></br>Lane Parameter Summary<br></br></h2>
33 <table border="1" cellpadding="5">
34 <tr>
35 <td>Lane</td>
36 <td>Sample ID</td>
37 <td>Sample Target</td>
38 <td>Sample Type</td>
39 <td>Length</td>
40 <td>Filter</td>
41 <td>Tiles</td>
42 </tr>
43 <tr>
44 <td>1</td>
45 <td>unknown</td>
46 <td>dm3</td>
47 <td>ELAND</td>
48 <td>32</td>
49 <td>'((CHASTITY>=0.6))'</td>
50 <td><a href="#Lane1">Lane 1</a></td>
51 </tr>
52 <tr>
53 <td>2</td>
54 <td>unknown</td>
55 <td>equcab1</td>
56 <td>ELAND</td>
57 <td>32</td>
58 <td>'((CHASTITY>=0.6))'</td>
59 <td><a href="#Lane2">Lane 2</a></td>
60 </tr>
61 <tr>
62 <td>3</td>
63 <td>unknown</td>
64 <td>equcab1</td>
65 <td>ELAND</td>
66 <td>32</td>
67 <td>'((CHASTITY>=0.6))'</td>
68 <td><a href="#Lane3">Lane 3</a></td>
69 </tr>
70 <tr>
71 <td>4</td>
72 <td>unknown</td>
73 <td>canfam2</td>
74 <td>ELAND</td>
75 <td>32</td>
76 <td>'((CHASTITY>=0.6))'</td>
77 <td><a href="#Lane4">Lane 4</a></td>
78 </tr>
79 <tr>
80 <td>5</td>
81 <td>unknown</td>
82 <td>hg18</td>
83 <td>ELAND</td>
84 <td>32</td>
85 <td>'((CHASTITY>=0.6))'</td>
86 <td><a href="#Lane5">Lane 5</a></td>
87 </tr>
88 <tr>
89 <td>6</td>
90 <td>unknown</td>
91 <td>hg18</td>
92 <td>ELAND</td>
93 <td>32</td>
94 <td>'((CHASTITY>=0.6))'</td>
95 <td><a href="#Lane6">Lane 6</a></td>
96 </tr>
97 <tr>
98 <td>7</td>
99 <td>unknown</td>
100 <td>hg18</td>
101 <td>ELAND</td>
102 <td>32</td>
103 <td>'((CHASTITY>=0.6))'</td>
104 <td><a href="#Lane7">Lane 7</a></td>
105 </tr>
106 <tr>
107 <td>8</td>
108 <td>unknown</td>
109 <td>hg18</td>
110 <td>ELAND</td>
111 <td>32</td>
112 <td>'((CHASTITY>=0.6))'</td>
113 <td><a href="#Lane8">Lane 8</a></td>
114 </tr>
115 </table>
116 <h2><br></br>Lane Results Summary<br></br></h2>
117 <table border="1" cellpadding="5">
118 <tr>
119
120 <td>Lane </td>
121 <td>Clusters </td>
122 <td>Av 1st Cycle Int </td>
123 <td>% intensity after 20 cycles </td>
124 <td>% PF Clusters </td>
125 <td>% Align (PF) </td>
126 <td>Av Alignment Score (PF) </td>
127 <td> % Error Rate (PF) </td>
128 </tr>
129 <tr>
130 <td>1</td>
131 <td>17421 +/- 2139</td>
132 <td>7230 +/- 801</td>
133 <td>23.73 +/- 10.79</td>
134 <td>13.00 +/- 22.91</td>
135 <td>32.03 +/- 18.45</td>
136 <td>6703.57 +/- 3753.85</td>
137 <td>4.55 +/- 4.81</td>
138 </tr>
139 <tr>
140 <td>2</td>
141 <td>20311 +/- 2402</td>
142 <td>7660 +/- 678</td>
143 <td>17.03 +/- 4.40</td>
144 <td>40.74 +/- 30.33</td>
145 <td>29.54 +/- 9.03</td>
146 <td>5184.02 +/- 1631.54</td>
147 <td>3.27 +/- 3.94</td>
148 </tr>
149 <tr>
150 <td>3</td>
151 <td>20193 +/- 2399</td>
152 <td>7700 +/- 797</td>
153 <td>15.75 +/- 3.30</td>
154 <td>56.56 +/- 17.16</td>
155 <td>27.33 +/- 7.48</td>
156 <td>4803.49 +/- 1313.31</td>
157 <td>3.07 +/- 2.86</td>
158 </tr>
159 <tr>
160 <td>4</td>
161 <td>15537 +/- 2531</td>
162 <td>7620 +/- 1392</td>
163 <td>15.37 +/- 3.79</td>
164 <td>63.05 +/- 18.30</td>
165 <td>15.88 +/- 4.99</td>
166 <td>3162.13 +/- 962.59</td>
167 <td>3.11 +/- 2.22</td>
168 </tr>
169 <tr>
170 <td>5</td>
171 <td>32047 +/- 3356</td>
172 <td>8093 +/- 831</td>
173 <td>23.79 +/- 6.18</td>
174 <td>53.36 +/- 18.06</td>
175 <td>48.04 +/- 13.77</td>
176 <td>9866.23 +/- 2877.30</td>
177 <td>2.26 +/- 1.16</td>
178 </tr>
179 <tr>
180 <td>6</td>
181 <td>32946 +/- 4753</td>
182 <td>8227 +/- 736</td>
183 <td>24.07 +/- 4.69</td>
184 <td>54.65 +/- 12.57</td>
185 <td>50.98 +/- 10.54</td>
186 <td>10468.86 +/- 2228.53</td>
187 <td>2.21 +/- 2.33</td>
188 </tr>
189 <tr>
190 <td>7</td>
191 <td>39504 +/- 4171</td>
192 <td>8401 +/- 785</td>
193 <td>22.55 +/- 4.56</td>
194 <td>45.22 +/- 10.34</td>
195 <td>48.41 +/- 9.67</td>
196 <td>9829.40 +/- 1993.20</td>
197 <td>2.26 +/- 1.11</td>
198 </tr>
199 <tr>
200 <td>8</td>
201 <td>37998 +/- 3792</td>
202 <td>8443 +/- 1211</td>
203 <td>39.03 +/- 7.52</td>
204 <td>42.16 +/- 12.35</td>
205 <td>40.98 +/- 14.89</td>
206 <td>8128.87 +/- 3055.34</td>
207 <td>3.57 +/- 2.77</td>
208 </tr>
209 </table>
210 </body>
211 </html>
212 """
213     pathname = os.path.join(gerald_dir, 'Summary.htm')
214     f = open(pathname, 'w')
215     f.write(summary_htm)
216     f.close()
217
218 def make_eland_results(gerald_dir):
219     eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759    ACATAGNCACAGACATAAACATAGACATAGAC U0      1       1       3       chrUextra.fa    28189829        R       D.
220 >HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA  U1      0       1       0       chr2L.fa        8796855 R       DD      24T
221 >HWI-EAS229_24_207BTAAXX:1:7:776:582    AGCTCANCCGATCGAAAACCTCNCCAAGCAAT        NM      0       0       0
222 >HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA        U1      0       1       0       Lambda.fa        8796855 R       DD      24T
223 """
224     for i in range(1,9):
225         pathname = os.path.join(gerald_dir,
226                                 's_%d_eland_result.txt' % (i,))
227         f = open(pathname, 'w')
228         f.write(eland_result)
229         f.close()
230
231 class RunfolderTests(TestCase):
232     """
233     Test components of the runfolder processing code
234     which includes firecrest, bustard, and gerald
235     """
236     def setUp(self):
237         # make a fake runfolder directory
238         self.temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
239
240         self.runfolder_dir = os.path.join(self.temp_dir,
241                                           '080102_HWI-EAS229_0010_207BTAAXX')
242         os.mkdir(self.runfolder_dir)
243
244         self.data_dir = os.path.join(self.runfolder_dir, 'Data')
245         os.mkdir(self.data_dir)
246
247         self.firecrest_dir = os.path.join(self.data_dir,
248                                'C1-33_Firecrest1.8.28_12-04-2008_diane'
249                              )
250         os.mkdir(self.firecrest_dir)
251         self.matrix_dir = os.path.join(self.firecrest_dir, 'Matrix')
252         os.mkdir(self.matrix_dir)
253         matrix_filename = os.path.join(self.matrix_dir, 's_matrix')
254         make_matrix(matrix_filename)
255
256         self.bustard_dir = os.path.join(self.firecrest_dir,
257                                         'Bustard1.8.28_12-04-2008_diane')
258         os.mkdir(self.bustard_dir)
259         make_phasing_params(self.bustard_dir)
260
261         self.gerald_dir = os.path.join(self.bustard_dir,
262                                        'GERALD_12-04-2008_diane')
263         os.mkdir(self.gerald_dir)
264         make_gerald_config_026(self.gerald_dir)
265         make_summary_htm(self.gerald_dir)
266         make_eland_results(self.gerald_dir)
267
268     def tearDown(self):
269         shutil.rmtree(self.temp_dir)
270
271     def test_firecrest(self):
272         """
273         Construct a firecrest object
274         """
275         f = firecrest.firecrest(self.firecrest_dir)
276         self.failUnlessEqual(f.software, 'Firecrest')
277         self.failUnlessEqual(f.version, '1.8.28')
278         self.failUnlessEqual(f.start, 1)
279         self.failUnlessEqual(f.stop, 33)
280         self.failUnlessEqual(f.user, 'diane')
281         self.failUnlessEqual(f.date, date(2008,4,12))
282
283         xml = f.get_elements()
284         # just make sure that element tree can serialize the tree
285         xml_str = ElementTree.tostring(xml)
286
287         f2 = firecrest.Firecrest(xml=xml)
288         self.failUnlessEqual(f.software, f2.software)
289         self.failUnlessEqual(f.version,  f2.version)
290         self.failUnlessEqual(f.start,    f2.start)
291         self.failUnlessEqual(f.stop,     f2.stop)
292         self.failUnlessEqual(f.user,     f2.user)
293         self.failUnlessEqual(f.date,     f2.date)
294
295     def test_bustard(self):
296         """
297         construct a bustard object
298         """
299         b = bustard.bustard(self.bustard_dir)
300         self.failUnlessEqual(b.software, 'Bustard')
301         self.failUnlessEqual(b.version, '1.8.28')
302         self.failUnlessEqual(b.date,    date(2008,4,12))
303         self.failUnlessEqual(b.user,    'diane')
304         self.failUnlessEqual(len(b.phasing), 8)
305         self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099)
306
307         xml = b.get_elements()
308         b2 = bustard.Bustard(xml=xml)
309         self.failUnlessEqual(b.software, b2.software)
310         self.failUnlessEqual(b.version,  b2.version)
311         self.failUnlessEqual(b.date,     b2.date )
312         self.failUnlessEqual(b.user,     b2.user)
313         self.failUnlessEqual(len(b.phasing), len(b2.phasing))
314         for key in b.phasing.keys():
315             self.failUnlessEqual(b.phasing[key].lane,
316                                  b2.phasing[key].lane)
317             self.failUnlessEqual(b.phasing[key].phasing,
318                                  b2.phasing[key].phasing)
319             self.failUnlessEqual(b.phasing[key].prephasing,
320                                  b2.phasing[key].prephasing)
321
322     def test_gerald(self):
323         # need to update gerald and make tests for it
324         g = gerald.gerald(self.gerald_dir)
325
326         self.failUnlessEqual(g.software, 'GERALD')
327         self.failUnlessEqual(g.version, '1.68.2.2')
328         self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30))
329         self.failUnlessEqual(len(g.lanes), len(g.lanes.keys()))
330         self.failUnlessEqual(len(g.lanes), len(g.lanes.items()))
331
332
333         # list of genomes, matches what was defined up in
334         # make_gerald_config.
335         # the first None is to offset the genomes list to be 1..9
336         # instead of pythons default 0..8
337         genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2',
338                          '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ]
339
340         # test lane specific parameters from gerald config file
341         for i in range(1,9):
342             cur_lane = g.lanes[i]
343             self.failUnlessEqual(cur_lane.analysis, 'eland')
344             self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
345             self.failUnlessEqual(cur_lane.read_length, '32')
346             self.failUnlessEqual(cur_lane.use_bases, 'Y'*32)
347
348         # test data extracted from summary file
349         clusters = [None,
350                     (17421, 2139), (20311, 2402), (20193, 2399), (15537, 2531),
351                     (32047, 3356), (32946, 4753), (39504, 4171), (37998, 3792)]
352
353         self.failUnlessEqual(len(g.summary), 1)
354         for i in range(1,9):
355             summary_lane = g.summary[0][i]
356             self.failUnlessEqual(summary_lane.cluster, clusters[i])
357             self.failUnlessEqual(summary_lane.lane, i)
358
359         xml = g.get_elements()
360         # just make sure that element tree can serialize the tree
361         xml_str = ElementTree.tostring(xml)
362         g2 = gerald.Gerald(xml=xml)
363
364         # do it all again after extracting from the xml file
365         self.failUnlessEqual(g.version, g2.version)
366         self.failUnlessEqual(g.date, g2.date)
367         self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys()))
368         self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items()))
369
370         # test lane specific parameters from gerald config file
371         for i in range(1,9):
372             g_lane = g.lanes[i]
373             g2_lane = g2.lanes[i]
374             self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
375             self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
376             self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
377             self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
378
379         self.failUnlessEqual(len(g.summary), 1)
380         # test (some) summary elements
381         for i in range(1,9):
382             g_summary = g.summary[0][i]
383             g2_summary = g2.summary[0][i]
384             self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
385             self.failUnlessEqual(g_summary.lane, g2_summary.lane)
386
387             g_eland = g.eland_results
388             g2_eland = g2.eland_results
389             for key in g_eland:
390                 g_results = g_eland[key]
391                 g2_results = g2_eland[key]
392                 self.failUnlessEqual(g_results.reads,
393                                      g2_results.reads)
394                 self.failUnlessEqual(len(g_results.mapped_reads),
395                                      len(g2_results.mapped_reads))
396                 for k in g_results.mapped_reads.keys():
397                     self.failUnlessEqual(g_results.mapped_reads[k],
398                                          g2_results.mapped_reads[k])
399
400                 self.failUnlessEqual(len(g_results.match_codes),
401                                      len(g2_results.match_codes))
402                 for k in g_results.match_codes.keys():
403                     self.failUnlessEqual(g_results.match_codes[k],
404                                          g2_results.match_codes[k])
405
406
407     def test_eland(self):
408         dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
409                     'chr2L.fa': 'dm3/chr2L.fa',
410                     'Lambda.fa': 'Lambda.fa'}
411         genome_maps = { 1:dm3_map, 2:dm3_map, 3:dm3_map, 4:dm3_map,
412                         5:dm3_map, 6:dm3_map, 7:dm3_map, 8:dm3_map }
413         eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
414
415         for key in eland:
416             lane = eland[key]
417             self.failUnlessEqual(lane.reads, 4)
418             self.failUnlessEqual(lane.sample_name, "s")
419             self.failUnlessEqual(lane.lane_id, key.lane)
420             self.failUnlessEqual(len(lane.mapped_reads), 3)
421             self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
422             self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
423             self.failUnlessEqual(lane.match_codes['U1'], 2)
424             self.failUnlessEqual(lane.match_codes['NM'], 1)
425
426         xml = eland.get_elements()
427         # just make sure that element tree can serialize the tree
428         xml_str = ElementTree.tostring(xml)
429         e2 = gerald.ELAND(xml=xml)
430
431         for key in eland:
432             l1 = eland[key]
433             l2 = e2[key]
434             self.failUnlessEqual(l1.reads, l2.reads)
435             self.failUnlessEqual(l1.sample_name, l2.sample_name)
436             self.failUnlessEqual(l1.lane_id, l2.lane_id)
437             self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
438             self.failUnlessEqual(len(l1.mapped_reads), 3)
439             for k in l1.mapped_reads.keys():
440                 self.failUnlessEqual(l1.mapped_reads[k],
441                                      l2.mapped_reads[k])
442
443             self.failUnlessEqual(len(l1.match_codes), 9)
444             self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
445             for k in l1.match_codes.keys():
446                 self.failUnlessEqual(l1.match_codes[k],
447                                      l2.match_codes[k])
448
449     def test_runfolder(self):
450         runs = runfolder.get_runs(self.runfolder_dir)
451
452         # do we get the flowcell id from the filename?
453         self.failUnlessEqual(len(runs), 1)
454         self.failUnlessEqual(runs[0].serialization_filename, 'run_207BTAAXX_2008-04-19.xml')
455
456         # do we get the flowcell id from the FlowcellId.xml file
457         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
458         runs = runfolder.get_runs(self.runfolder_dir)
459         self.failUnlessEqual(len(runs), 1)
460         self.failUnlessEqual(runs[0].serialization_filename, 'run_207BTAAXY_2008-04-19.xml')
461
462         r1 = runs[0]
463         xml = r1.get_elements()
464         xml_str = ElementTree.tostring(xml)
465
466         r2 = runfolder.PipelineRun(xml=xml)
467         self.failUnlessEqual(r1.serialization_filename, r2.serialization_filename)
468         self.failIfEqual(r2.image_analysis, None)
469         self.failIfEqual(r2.bustard, None)
470         self.failIfEqual(r2.gerald, None)
471
472
473 def suite():
474     from unittest import TestSuite, defaultTestLoader
475     suite = TestSuite()
476     suite.addTests(defaultTestLoader.loadTestsFromTestCase(RunfolderTests))
477     return suite
478
479
480 if __name__ == "__main__":
481     from unittest import main
482     main(defaultTest="suite")