take advantage of absolute_import to simplify import statements
[htsworkflow.git] / htsworkflow / pipelines / test / test_runfolder026.py
1 #!/usr/bin/env python
2 from __future__ import absolute_import
3
4 from datetime import datetime, date
5 import os
6 import tempfile
7 import shutil
8 from unittest import TestCase
9
10 from htsworkflow.pipelines import firecrest
11 from htsworkflow.pipelines import bustard
12 from htsworkflow.pipelines import gerald
13 from htsworkflow.pipelines import runfolder
14 from htsworkflow.pipelines import ElementTree
15
16 from .simulate_runfolder import *
17
18
19 def make_summary_htm(gerald_dir):
20     summary_htm = """<!--RUN_TIME Mon Apr 21 11:52:25 2008 -->
21 <!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.31 2007/03/05 17:52:15 km Exp $-->
22 <html>
23 <body>
24
25 <a name="Top"><h2><title>080416_HWI-EAS229_0024_207BTAAXX Summary</title></h2></a>
26 <h1>Summary Information For Experiment 080416_HWI-EAS229_0024_207BTAAXX on Machine HWI-EAS229</h1>
27 <h2><br></br>Chip Summary<br></br></h2>
28 <table border="1" cellpadding="5">
29 <tr><td>Machine</td><td>HWI-EAS229</td></tr>
30 <tr><td>Run Folder</td><td>080416_HWI-EAS229_0024_207BTAAXX</td></tr>
31 <tr><td>Chip ID</td><td>unknown</td></tr>
32 </table>
33 <h2><br></br>Lane Parameter Summary<br></br></h2>
34 <table border="1" cellpadding="5">
35 <tr>
36 <td>Lane</td>
37 <td>Sample ID</td>
38 <td>Sample Target</td>
39 <td>Sample Type</td>
40 <td>Length</td>
41 <td>Filter</td>
42 <td>Tiles</td>
43 </tr>
44 <tr>
45 <td>1</td>
46 <td>unknown</td>
47 <td>dm3</td>
48 <td>ELAND</td>
49 <td>32</td>
50 <td>'((CHASTITY>=0.6))'</td>
51 <td><a href="#Lane1">Lane 1</a></td>
52 </tr>
53 <tr>
54 <td>2</td>
55 <td>unknown</td>
56 <td>equcab1</td>
57 <td>ELAND</td>
58 <td>32</td>
59 <td>'((CHASTITY>=0.6))'</td>
60 <td><a href="#Lane2">Lane 2</a></td>
61 </tr>
62 <tr>
63 <td>3</td>
64 <td>unknown</td>
65 <td>equcab1</td>
66 <td>ELAND</td>
67 <td>32</td>
68 <td>'((CHASTITY>=0.6))'</td>
69 <td><a href="#Lane3">Lane 3</a></td>
70 </tr>
71 <tr>
72 <td>4</td>
73 <td>unknown</td>
74 <td>canfam2</td>
75 <td>ELAND</td>
76 <td>32</td>
77 <td>'((CHASTITY>=0.6))'</td>
78 <td><a href="#Lane4">Lane 4</a></td>
79 </tr>
80 <tr>
81 <td>5</td>
82 <td>unknown</td>
83 <td>hg18</td>
84 <td>ELAND</td>
85 <td>32</td>
86 <td>'((CHASTITY>=0.6))'</td>
87 <td><a href="#Lane5">Lane 5</a></td>
88 </tr>
89 <tr>
90 <td>6</td>
91 <td>unknown</td>
92 <td>hg18</td>
93 <td>ELAND</td>
94 <td>32</td>
95 <td>'((CHASTITY>=0.6))'</td>
96 <td><a href="#Lane6">Lane 6</a></td>
97 </tr>
98 <tr>
99 <td>7</td>
100 <td>unknown</td>
101 <td>hg18</td>
102 <td>ELAND</td>
103 <td>32</td>
104 <td>'((CHASTITY>=0.6))'</td>
105 <td><a href="#Lane7">Lane 7</a></td>
106 </tr>
107 <tr>
108 <td>8</td>
109 <td>unknown</td>
110 <td>hg18</td>
111 <td>ELAND</td>
112 <td>32</td>
113 <td>'((CHASTITY>=0.6))'</td>
114 <td><a href="#Lane8">Lane 8</a></td>
115 </tr>
116 </table>
117 <h2><br></br>Lane Results Summary<br></br></h2>
118 <table border="1" cellpadding="5">
119 <tr>
120
121 <td>Lane </td>
122 <td>Clusters </td>
123 <td>Av 1st Cycle Int </td>
124 <td>% intensity after 20 cycles </td>
125 <td>% PF Clusters </td>
126 <td>% Align (PF) </td>
127 <td>Av Alignment Score (PF) </td>
128 <td> % Error Rate (PF) </td>
129 </tr>
130 <tr>
131 <td>1</td>
132 <td>17421 +/- 2139</td>
133 <td>7230 +/- 801</td>
134 <td>23.73 +/- 10.79</td>
135 <td>13.00 +/- 22.91</td>
136 <td>32.03 +/- 18.45</td>
137 <td>6703.57 +/- 3753.85</td>
138 <td>4.55 +/- 4.81</td>
139 </tr>
140 <tr>
141 <td>2</td>
142 <td>20311 +/- 2402</td>
143 <td>7660 +/- 678</td>
144 <td>17.03 +/- 4.40</td>
145 <td>40.74 +/- 30.33</td>
146 <td>29.54 +/- 9.03</td>
147 <td>5184.02 +/- 1631.54</td>
148 <td>3.27 +/- 3.94</td>
149 </tr>
150 <tr>
151 <td>3</td>
152 <td>20193 +/- 2399</td>
153 <td>7700 +/- 797</td>
154 <td>15.75 +/- 3.30</td>
155 <td>56.56 +/- 17.16</td>
156 <td>27.33 +/- 7.48</td>
157 <td>4803.49 +/- 1313.31</td>
158 <td>3.07 +/- 2.86</td>
159 </tr>
160 <tr>
161 <td>4</td>
162 <td>15537 +/- 2531</td>
163 <td>7620 +/- 1392</td>
164 <td>15.37 +/- 3.79</td>
165 <td>63.05 +/- 18.30</td>
166 <td>15.88 +/- 4.99</td>
167 <td>3162.13 +/- 962.59</td>
168 <td>3.11 +/- 2.22</td>
169 </tr>
170 <tr>
171 <td>5</td>
172 <td>32047 +/- 3356</td>
173 <td>8093 +/- 831</td>
174 <td>23.79 +/- 6.18</td>
175 <td>53.36 +/- 18.06</td>
176 <td>48.04 +/- 13.77</td>
177 <td>9866.23 +/- 2877.30</td>
178 <td>2.26 +/- 1.16</td>
179 </tr>
180 <tr>
181 <td>6</td>
182 <td>32946 +/- 4753</td>
183 <td>8227 +/- 736</td>
184 <td>24.07 +/- 4.69</td>
185 <td>54.65 +/- 12.57</td>
186 <td>50.98 +/- 10.54</td>
187 <td>10468.86 +/- 2228.53</td>
188 <td>2.21 +/- 2.33</td>
189 </tr>
190 <tr>
191 <td>7</td>
192 <td>39504 +/- 4171</td>
193 <td>8401 +/- 785</td>
194 <td>22.55 +/- 4.56</td>
195 <td>45.22 +/- 10.34</td>
196 <td>48.41 +/- 9.67</td>
197 <td>9829.40 +/- 1993.20</td>
198 <td>2.26 +/- 1.11</td>
199 </tr>
200 <tr>
201 <td>8</td>
202 <td>37998 +/- 3792</td>
203 <td>8443 +/- 1211</td>
204 <td>39.03 +/- 7.52</td>
205 <td>42.16 +/- 12.35</td>
206 <td>40.98 +/- 14.89</td>
207 <td>8128.87 +/- 3055.34</td>
208 <td>3.57 +/- 2.77</td>
209 </tr>
210 </table>
211 </body>
212 </html>
213 """
214     pathname = os.path.join(gerald_dir, 'Summary.htm')
215     f = open(pathname, 'w')
216     f.write(summary_htm)
217     f.close()
218
219 def make_eland_results(gerald_dir):
220     eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759    ACATAGNCACAGACATAAACATAGACATAGAC U0      1       1       3       chrUextra.fa    28189829        R       D.
221 >HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA  U1      0       1       0       chr2L.fa        8796855 R       DD      24T
222 >HWI-EAS229_24_207BTAAXX:1:7:776:582    AGCTCANCCGATCGAAAACCTCNCCAAGCAAT        NM      0       0       0
223 >HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA        U1      0       1       0       Lambda.fa        8796855 R       DD      24T
224 """
225     for i in range(1,9):
226         pathname = os.path.join(gerald_dir,
227                                 's_%d_eland_result.txt' % (i,))
228         f = open(pathname, 'w')
229         f.write(eland_result)
230         f.close()
231
232 class RunfolderTests(TestCase):
233     """
234     Test components of the runfolder processing code
235     which includes firecrest, bustard, and gerald
236     """
237     def setUp(self):
238         # make a fake runfolder directory
239         self.temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
240
241         self.runfolder_dir = os.path.join(self.temp_dir,
242                                           '080102_HWI-EAS229_0010_207BTAAXX')
243         os.mkdir(self.runfolder_dir)
244
245         self.data_dir = os.path.join(self.runfolder_dir, 'Data')
246         os.mkdir(self.data_dir)
247
248         self.firecrest_dir = os.path.join(self.data_dir,
249                                'C1-33_Firecrest1.8.28_12-04-2008_diane'
250                              )
251         os.mkdir(self.firecrest_dir)
252         self.matrix_dir = os.path.join(self.firecrest_dir, 'Matrix')
253         os.mkdir(self.matrix_dir)
254         matrix_filename = os.path.join(self.matrix_dir, 's_matrix')
255         make_matrix(matrix_filename)
256
257         self.bustard_dir = os.path.join(self.firecrest_dir,
258                                         'Bustard1.8.28_12-04-2008_diane')
259         os.mkdir(self.bustard_dir)
260         make_phasing_params(self.bustard_dir)
261
262         self.gerald_dir = os.path.join(self.bustard_dir,
263                                        'GERALD_12-04-2008_diane')
264         os.mkdir(self.gerald_dir)
265         make_gerald_config_026(self.gerald_dir)
266         make_summary_htm(self.gerald_dir)
267         make_eland_results(self.gerald_dir)
268
269     def tearDown(self):
270         shutil.rmtree(self.temp_dir)
271
272     def test_firecrest(self):
273         """
274         Construct a firecrest object
275         """
276         f = firecrest.firecrest(self.firecrest_dir)
277         self.failUnlessEqual(f.software, 'Firecrest')
278         self.failUnlessEqual(f.version, '1.8.28')
279         self.failUnlessEqual(f.start, 1)
280         self.failUnlessEqual(f.stop, 33)
281         self.failUnlessEqual(f.user, 'diane')
282         self.failUnlessEqual(f.date, date(2008,4,12))
283
284         xml = f.get_elements()
285         # just make sure that element tree can serialize the tree
286         xml_str = ElementTree.tostring(xml)
287
288         f2 = firecrest.Firecrest(xml=xml)
289         self.failUnlessEqual(f.software, f2.software)
290         self.failUnlessEqual(f.version,  f2.version)
291         self.failUnlessEqual(f.start,    f2.start)
292         self.failUnlessEqual(f.stop,     f2.stop)
293         self.failUnlessEqual(f.user,     f2.user)
294         self.failUnlessEqual(f.date,     f2.date)
295
296     def test_bustard(self):
297         """
298         construct a bustard object
299         """
300         b = bustard.bustard(self.bustard_dir)
301         self.failUnlessEqual(b.software, 'Bustard')
302         self.failUnlessEqual(b.version, '1.8.28')
303         self.failUnlessEqual(b.date,    date(2008,4,12))
304         self.failUnlessEqual(b.user,    'diane')
305         self.failUnlessEqual(len(b.phasing), 8)
306         self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099)
307
308         xml = b.get_elements()
309         b2 = bustard.Bustard(xml=xml)
310         self.failUnlessEqual(b.software, b2.software)
311         self.failUnlessEqual(b.version,  b2.version)
312         self.failUnlessEqual(b.date,     b2.date )
313         self.failUnlessEqual(b.user,     b2.user)
314         self.failUnlessEqual(len(b.phasing), len(b2.phasing))
315         for key in b.phasing.keys():
316             self.failUnlessEqual(b.phasing[key].lane,
317                                  b2.phasing[key].lane)
318             self.failUnlessEqual(b.phasing[key].phasing,
319                                  b2.phasing[key].phasing)
320             self.failUnlessEqual(b.phasing[key].prephasing,
321                                  b2.phasing[key].prephasing)
322
323     def test_gerald(self):
324         # need to update gerald and make tests for it
325         g = gerald.gerald(self.gerald_dir)
326
327         self.failUnlessEqual(g.software, 'GERALD')
328         self.failUnlessEqual(g.version, '1.68.2.2')
329         self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30))
330         self.failUnlessEqual(len(g.lanes), len(g.lanes.keys()))
331         self.failUnlessEqual(len(g.lanes), len(g.lanes.items()))
332
333
334         # list of genomes, matches what was defined up in
335         # make_gerald_config.
336         # the first None is to offset the genomes list to be 1..9
337         # instead of pythons default 0..8
338         genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2',
339                          '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ]
340
341         # test lane specific parameters from gerald config file
342         for i in range(1,9):
343             cur_lane = g.lanes[i]
344             self.failUnlessEqual(cur_lane.analysis, 'eland')
345             self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
346             self.failUnlessEqual(cur_lane.read_length, '32')
347             self.failUnlessEqual(cur_lane.use_bases, 'Y'*32)
348
349         # test data extracted from summary file
350         clusters = [None,
351                     (17421, 2139), (20311, 2402), (20193, 2399), (15537, 2531),
352                     (32047, 3356), (32946, 4753), (39504, 4171), (37998, 3792)]
353
354         self.failUnlessEqual(len(g.summary), 1)
355         for i in range(1,9):
356             summary_lane = g.summary[0][i]
357             self.failUnlessEqual(summary_lane.cluster, clusters[i])
358             self.failUnlessEqual(summary_lane.lane, i)
359
360         xml = g.get_elements()
361         # just make sure that element tree can serialize the tree
362         xml_str = ElementTree.tostring(xml)
363         g2 = gerald.Gerald(xml=xml)
364
365         # do it all again after extracting from the xml file
366         self.failUnlessEqual(g.version, g2.version)
367         self.failUnlessEqual(g.date, g2.date)
368         self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys()))
369         self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items()))
370
371         # test lane specific parameters from gerald config file
372         for i in range(1,9):
373             g_lane = g.lanes[i]
374             g2_lane = g2.lanes[i]
375             self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
376             self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
377             self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
378             self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
379
380         self.failUnlessEqual(len(g.summary), 1)
381         # test (some) summary elements
382         for i in range(1,9):
383             g_summary = g.summary[0][i]
384             g2_summary = g2.summary[0][i]
385             self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
386             self.failUnlessEqual(g_summary.lane, g2_summary.lane)
387
388             g_eland = g.eland_results
389             g2_eland = g2.eland_results
390             for key in g_eland:
391                 g_results = g_eland[key]
392                 g2_results = g2_eland[key]
393                 self.failUnlessEqual(g_results.reads,
394                                      g2_results.reads)
395                 self.failUnlessEqual(len(g_results.mapped_reads),
396                                      len(g2_results.mapped_reads))
397                 for k in g_results.mapped_reads.keys():
398                     self.failUnlessEqual(g_results.mapped_reads[k],
399                                          g2_results.mapped_reads[k])
400
401                 self.failUnlessEqual(len(g_results.match_codes),
402                                      len(g2_results.match_codes))
403                 for k in g_results.match_codes.keys():
404                     self.failUnlessEqual(g_results.match_codes[k],
405                                          g2_results.match_codes[k])
406
407
408     def test_eland(self):
409         dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
410                     'chr2L.fa': 'dm3/chr2L.fa',
411                     'Lambda.fa': 'Lambda.fa'}
412         genome_maps = { 1:dm3_map, 2:dm3_map, 3:dm3_map, 4:dm3_map,
413                         5:dm3_map, 6:dm3_map, 7:dm3_map, 8:dm3_map }
414         eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
415
416         for key in eland:
417             lane = eland[key]
418             self.failUnlessEqual(lane.reads, 4)
419             self.failUnlessEqual(lane.sample_name, "s")
420             self.failUnlessEqual(lane.lane_id, key.lane)
421             self.failUnlessEqual(len(lane.mapped_reads), 3)
422             self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
423             self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
424             self.failUnlessEqual(lane.match_codes['U1'], 2)
425             self.failUnlessEqual(lane.match_codes['NM'], 1)
426
427         xml = eland.get_elements()
428         # just make sure that element tree can serialize the tree
429         xml_str = ElementTree.tostring(xml)
430         e2 = gerald.ELAND(xml=xml)
431
432         for key in eland:
433             l1 = eland[key]
434             l2 = e2[key]
435             self.failUnlessEqual(l1.reads, l2.reads)
436             self.failUnlessEqual(l1.sample_name, l2.sample_name)
437             self.failUnlessEqual(l1.lane_id, l2.lane_id)
438             self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
439             self.failUnlessEqual(len(l1.mapped_reads), 3)
440             for k in l1.mapped_reads.keys():
441                 self.failUnlessEqual(l1.mapped_reads[k],
442                                      l2.mapped_reads[k])
443
444             self.failUnlessEqual(len(l1.match_codes), 9)
445             self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
446             for k in l1.match_codes.keys():
447                 self.failUnlessEqual(l1.match_codes[k],
448                                      l2.match_codes[k])
449
450     def test_runfolder(self):
451         runs = runfolder.get_runs(self.runfolder_dir)
452
453         # do we get the flowcell id from the filename?
454         self.failUnlessEqual(len(runs), 1)
455         self.failUnlessEqual(runs[0].serialization_filename, 'run_207BTAAXX_2008-04-19.xml')
456
457         # do we get the flowcell id from the FlowcellId.xml file
458         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
459         runs = runfolder.get_runs(self.runfolder_dir)
460         self.failUnlessEqual(len(runs), 1)
461         self.failUnlessEqual(runs[0].serialization_filename, 'run_207BTAAXY_2008-04-19.xml')
462
463         r1 = runs[0]
464         xml = r1.get_elements()
465         xml_str = ElementTree.tostring(xml)
466
467         r2 = runfolder.PipelineRun(xml=xml)
468         self.failUnlessEqual(r1.serialization_filename, r2.serialization_filename)
469         self.failIfEqual(r2.image_analysis, None)
470         self.failIfEqual(r2.bustard, None)
471         self.failIfEqual(r2.gerald, None)
472
473
474 def suite():
475     from unittest import TestSuite, defaultTestLoader
476     suite = TestSuite()
477     suite.addTests(defaultTestLoader.loadTestsFromTestCase(RunfolderTests))
478     return suite
479
480
481 if __name__ == "__main__":
482     from unittest import main
483     main(defaultTest="suite")