Change unittest2 back into unittest.
[htsworkflow.git] / htsworkflow / pipelines / test / test_runfolder030.py
1 #!/usr/bin/env python
2
3 from datetime import datetime, date
4 import os
5 import tempfile
6 import shutil
7 from unittest import TestCase
8
9 from htsworkflow.pipelines import firecrest
10 from htsworkflow.pipelines import bustard
11 from htsworkflow.pipelines import gerald
12 from htsworkflow.pipelines import runfolder
13 from htsworkflow.pipelines import ElementTree
14
15 from htsworkflow.pipelines.test.simulate_runfolder import *
16
17
18 def make_summary_htm(gerald_dir):
19     summary_htm="""<!--RUN_TIME Wed Jul  2 06:47:44 2008 -->
20 <!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.94 2007/12/04 09:59:07 rshaw Exp $-->
21 <html>
22 <body>
23
24 <a name="Top"><h2><title>080627_HWI-EAS229_0036_3055HAXX Summary</title></h2></a>
25 <h1>Summary Information For Experiment 080627_HWI-EAS229_0036_3055HAXX on Machine HWI-EAS229</h1>
26 <h2><br></br>Chip Summary<br></br></h2>
27 <table border="1" cellpadding="5">
28 <tr><td>Machine</td><td>HWI-EAS229</td></tr>
29 <tr><td>Run Folder</td><td>080627_HWI-EAS229_0036_3055HAXX</td></tr>
30 <tr><td>Chip ID</td><td>unknown</td></tr>
31 </table>
32 <h2><br></br>Chip Results Summary<br></br></h2>
33 <table border="1" cellpadding="5">
34 <tr>
35 <td>Clusters</td>
36 <td>Clusters (PF)</td>
37 <td>Yield (kbases)</td>
38 </tr>
39 <tr><td>80933224</td>
40 <td>43577803</td>
41 <td>1133022</td>
42 </tr>
43 </table>
44 <h2><br></br>Lane Parameter Summary<br></br></h2>
45 <table border="1" cellpadding="5">
46 <tr>
47 <td>Lane</td>
48 <td>Sample ID</td>
49 <td>Sample Target</td>
50 <td>Sample Type</td>
51 <td>Length</td>
52 <td>Filter</td>
53 <td>Num Tiles</td>
54 <td>Tiles</td>
55 </tr>
56 <tr>
57 <td>1</td>
58 <td>unknown</td>
59 <td>mm9</td>
60 <td>ELAND</td>
61 <td>26</td>
62 <td>'((CHASTITY>=0.6))'</td>
63 <td>100</td>
64 <td><a href="#Lane1">Lane 1</a></td>
65 </tr>
66 <tr>
67 <td>2</td>
68 <td>unknown</td>
69 <td>mm9</td>
70 <td>ELAND</td>
71 <td>26</td>
72 <td>'((CHASTITY>=0.6))'</td>
73 <td>100</td>
74 <td><a href="#Lane2">Lane 2</a></td>
75 </tr>
76 <tr>
77 <td>3</td>
78 <td>unknown</td>
79 <td>mm9</td>
80 <td>ELAND</td>
81 <td>26</td>
82 <td>'((CHASTITY>=0.6))'</td>
83 <td>100</td>
84 <td><a href="#Lane3">Lane 3</a></td>
85 </tr>
86 <tr>
87 <td>4</td>
88 <td>unknown</td>
89 <td>elegans170</td>
90 <td>ELAND</td>
91 <td>26</td>
92 <td>'((CHASTITY>=0.6))'</td>
93 <td>100</td>
94 <td><a href="#Lane4">Lane 4</a></td>
95 </tr>
96 <tr>
97 <td>5</td>
98 <td>unknown</td>
99 <td>elegans170</td>
100 <td>ELAND</td>
101 <td>26</td>
102 <td>'((CHASTITY>=0.6))'</td>
103 <td>100</td>
104 <td><a href="#Lane5">Lane 5</a></td>
105 </tr>
106 <tr>
107 <td>6</td>
108 <td>unknown</td>
109 <td>elegans170</td>
110 <td>ELAND</td>
111 <td>26</td>
112 <td>'((CHASTITY>=0.6))'</td>
113 <td>100</td>
114 <td><a href="#Lane6">Lane 6</a></td>
115 </tr>
116 <tr>
117 <td>7</td>
118 <td>unknown</td>
119 <td>elegans170</td>
120 <td>ELAND</td>
121 <td>26</td>
122 <td>'((CHASTITY>=0.6))'</td>
123 <td>100</td>
124 <td><a href="#Lane7">Lane 7</a></td>
125 </tr>
126 <tr>
127 <td>8</td>
128 <td>unknown</td>
129 <td>elegans170</td>
130 <td>ELAND</td>
131 <td>26</td>
132 <td>'((CHASTITY>=0.6))'</td>
133 <td>100</td>
134 <td><a href="#Lane8">Lane 8</a></td>
135 </tr>
136 </table>
137 <h2><br></br>Lane Results Summary<br></br></h2>
138 <table border="1" cellpadding="5">
139 <tr>
140 <td colspan="2">Lane Info</td>
141 <td colspan="8">Tile Mean +/- SD for Lane</td>
142 </tr>
143 <tr>
144 <td>Lane </td>
145 <td>Lane Yield (kbases) </td>
146 <td>Clusters (raw)</td>
147 <td>Clusters (PF) </td>
148 <td>1st Cycle Int (PF) </td>
149 <td>% intensity after 20 cycles (PF) </td>
150 <td>% PF Clusters </td>
151 <td>% Align (PF) </td>
152 <td>Alignment Score (PF) </td>
153 <td> % Error Rate (PF) </td>
154 </tr>
155 <tr>
156 <td>1</td>
157 <td>158046</td>
158 <td>96483 +/- 9074</td>
159 <td>60787 +/- 4240</td>
160 <td>329 +/- 35</td>
161 <td>101.88 +/- 6.03</td>
162 <td>63.21 +/- 3.29</td>
163 <td>70.33 +/- 0.24</td>
164 <td>9054.08 +/- 59.16</td>
165 <td>0.46 +/- 0.18</td>
166 </tr>
167 <tr>
168 <td>2</td>
169 <td>156564</td>
170 <td>133738 +/- 7938</td>
171 <td>60217 +/- 1926</td>
172 <td>444 +/- 39</td>
173 <td>92.62 +/- 7.58</td>
174 <td>45.20 +/- 3.31</td>
175 <td>51.98 +/- 0.74</td>
176 <td>6692.04 +/- 92.49</td>
177 <td>0.46 +/- 0.09</td>
178 </tr>
179 <tr>
180 <td>3</td>
181 <td>185818</td>
182 <td>152142 +/- 10002</td>
183 <td>71468 +/- 2827</td>
184 <td>366 +/- 36</td>
185 <td>91.53 +/- 8.66</td>
186 <td>47.19 +/- 3.80</td>
187 <td>82.24 +/- 0.44</td>
188 <td>10598.68 +/- 64.13</td>
189 <td>0.41 +/- 0.04</td>
190 </tr>
191 <tr>
192 <td>4</td>
193 <td>34953</td>
194 <td>15784 +/- 2162</td>
195 <td>13443 +/- 1728</td>
196 <td>328 +/- 40</td>
197 <td>97.53 +/- 9.87</td>
198 <td>85.29 +/- 1.91</td>
199 <td>80.02 +/- 0.53</td>
200 <td>10368.82 +/- 71.08</td>
201 <td>0.15 +/- 0.05</td>
202 </tr>
203 <tr>
204 <td>5</td>
205 <td>167936</td>
206 <td>119735 +/- 8465</td>
207 <td>64590 +/- 2529</td>
208 <td>417 +/- 37</td>
209 <td>88.69 +/- 14.79</td>
210 <td>54.10 +/- 2.59</td>
211 <td>76.95 +/- 0.32</td>
212 <td>9936.47 +/- 65.75</td>
213 <td>0.28 +/- 0.02</td>
214 </tr>
215 <tr>
216 <td>6</td>
217 <td>173463</td>
218 <td>152177 +/- 8146</td>
219 <td>66716 +/- 2493</td>
220 <td>372 +/- 39</td>
221 <td>87.06 +/- 9.86</td>
222 <td>43.98 +/- 3.12</td>
223 <td>78.80 +/- 0.43</td>
224 <td>10162.28 +/- 49.65</td>
225 <td>0.38 +/- 0.03</td>
226 </tr>
227 <tr>
228 <td>7</td>
229 <td>149287</td>
230 <td>84649 +/- 7325</td>
231 <td>57418 +/- 3617</td>
232 <td>295 +/- 28</td>
233 <td>89.40 +/- 8.23</td>
234 <td>67.97 +/- 1.82</td>
235 <td>33.38 +/- 0.25</td>
236 <td>4247.92 +/- 32.37</td>
237 <td>1.00 +/- 0.03</td>
238 </tr>
239 <tr>
240 <td>8</td>
241 <td>106953</td>
242 <td>54622 +/- 4812</td>
243 <td>41136 +/- 3309</td>
244 <td>284 +/- 37</td>
245 <td>90.21 +/- 9.10</td>
246 <td>75.39 +/- 2.27</td>
247 <td>48.33 +/- 0.29</td>
248 <td>6169.21 +/- 169.50</td>
249 <td>0.86 +/- 1.22</td>
250 </tr>
251 <tr><td colspan="13">Tile mean across chip</td></tr>
252 <tr>
253 <td>Av.</td>
254 <td></td>
255 <td>101166</td>
256 <td>54472</td>
257 <td>354</td>
258 <td>92.36</td>
259 <td>60.29</td>
260 <td>65.25</td>
261 <td>8403.69</td>
262 <td>0.50</td>
263 </tr>
264 </table>
265 <h2><br></br>Expanded Lane Summary<br></br></h2>
266 <table border="1" cellpadding="5">
267 <tr>
268
269 <tr><td colspan="2">Lane Info</td>
270 <td colspan="2">Phasing Info</td>
271 <td colspan="2">Raw Data (tile mean)</td>
272 <td colspan="7">Filtered Data (tile mean)</td></tr>
273 <td>Lane </td>
274 <td>Clusters (tile mean) (raw)</td>
275 <td>% Phasing </td>
276 <td>% Prephasing </td>
277 <td>% Error Rate (raw) </td>
278 <td> Equiv Perfect Clusters (raw) </td>
279 <td>% retained </td>
280 <td>Cycle 2-4 Av Int (PF) </td>
281 <td>Cycle 2-10 Av % Loss (PF) </td>
282 <td>Cycle 10-20 Av % Loss (PF) </td>
283 <td>% Align (PF) </td>
284 <td>% Error Rate (PF) </td>
285 <td> Equiv Perfect Clusters (PF) </td>
286 </tr>
287 <tr>
288 <td>1</td>
289 <td>96483</td>
290 <td>0.7700</td>
291 <td>0.3100</td>
292 <td>1.00</td>
293 <td>49676</td>
294 <td>63.21</td>
295 <td>317 +/- 32</td>
296 <td>0.13 +/- 0.44</td>
297 <td>-1.14 +/- 0.34</td>
298 <td>70.33</td>
299 <td>0.46</td>
300 <td>41758</td>
301 </tr>
302 <tr>
303 <td>2</td>
304 <td>133738</td>
305 <td>0.7700</td>
306 <td>0.3100</td>
307 <td>1.22</td>
308 <td>40467</td>
309 <td>45.20</td>
310 <td>415 +/- 33</td>
311 <td>0.29 +/- 0.40</td>
312 <td>-0.79 +/- 0.35</td>
313 <td>51.98</td>
314 <td>0.46</td>
315 <td>30615</td>
316 </tr>
317 <tr>
318 <td>3</td>
319 <td>152142</td>
320 <td>0.7700</td>
321 <td>0.3100</td>
322 <td>1.30</td>
323 <td>78588</td>
324 <td>47.19</td>
325 <td>344 +/- 26</td>
326 <td>0.68 +/- 0.51</td>
327 <td>-0.77 +/- 0.42</td>
328 <td>82.24</td>
329 <td>0.41</td>
330 <td>57552</td>
331 </tr>
332 <tr>
333 <td>4</td>
334 <td>15784</td>
335 <td>0.7700</td>
336 <td>0.3100</td>
337 <td>0.29</td>
338 <td>11095</td>
339 <td>85.29</td>
340 <td>306 +/- 34</td>
341 <td>0.20 +/- 0.69</td>
342 <td>-1.28 +/- 0.66</td>
343 <td>80.02</td>
344 <td>0.15</td>
345 <td>10671</td>
346 </tr>
347 <tr>
348 <td>5</td>
349 <td>119735</td>
350 <td>0.7700</td>
351 <td>0.3100</td>
352 <td>0.85</td>
353 <td>60335</td>
354 <td>54.10</td>
355 <td>380 +/- 32</td>
356 <td>0.34 +/- 0.49</td>
357 <td>-1.55 +/- 4.69</td>
358 <td>76.95</td>
359 <td>0.28</td>
360 <td>49015</td>
361 </tr>
362 <tr>
363 <td>6</td>
364 <td>152177</td>
365 <td>0.7700</td>
366 <td>0.3100</td>
367 <td>1.21</td>
368 <td>70905</td>
369 <td>43.98</td>
370 <td>333 +/- 27</td>
371 <td>0.57 +/- 0.50</td>
372 <td>-0.91 +/- 0.39</td>
373 <td>78.80</td>
374 <td>0.38</td>
375 <td>51663</td>
376 </tr>
377 <tr>
378 <td>7</td>
379 <td>84649</td>
380 <td>0.7700</td>
381 <td>0.3100</td>
382 <td>1.38</td>
383 <td>21069</td>
384 <td>67.97</td>
385 <td>272 +/- 20</td>
386 <td>1.15 +/- 0.52</td>
387 <td>-0.84 +/- 0.58</td>
388 <td>33.38</td>
389 <td>1.00</td>
390 <td>18265</td>
391 </tr>
392 <tr>
393 <td>8</td>
394 <td>54622</td>
395 <td>0.7700</td>
396 <td>0.3100</td>
397 <td>1.17</td>
398 <td>21335</td>
399 <td>75.39</td>
400 <td>262 +/- 31</td>
401 <td>1.10 +/- 0.59</td>
402 <td>-1.01 +/- 0.47</td>
403 <td>48.33</td>
404 <td>0.86</td>
405 <td>19104</td>
406 </tr>
407 </table>
408 <b><br></br>IVC Plots</b>
409 <p> <a href='IVC.htm' target="_blank"> IVC.htm
410  </a></p>
411 <b><br></br>All Intensity Plots</b>
412 <p> <a href='All.htm' target="_blank"> All.htm
413  </a></p>
414 <b><br></br>Error graphs: </b>
415 <p> <a href='Error.htm' target="_blank"> Error.htm
416  </a></p>
417 <td><a href="#Top">Back to top</a></td>
418 <a name="Lane1"><h2><br></br>Lane 1<br></br></h2></a>
419 <table border="1" cellpadding="5">
420 <tr>
421 <td>Lane </td>
422 <td>Tile </td>
423 <td>Clusters (raw)</td>
424 <td>Av 1st Cycle Int (PF) </td>
425 <td>Av % intensity after 20 cycles (PF) </td>
426 <td>% PF Clusters </td>
427 <td>% Align (PF) </td>
428 <td>Av Alignment Score (PF) </td>
429 <td>% Error Rate (PF) </td>
430 </tr>
431 <tr>
432 <td>1</td>
433 <td>0001</td>
434 <td>114972</td>
435 <td>326.48</td>
436 <td>94.39</td>
437 <td>57.44</td>
438 <td>70.2</td>
439 <td>9038.6</td>
440 <td>0.44</td>
441 </tr>
442 </table>
443 <td><a href="#Top">Back to top</a></td>
444 <a name="Lane2"><h2><br></br>Lane 2<br></br></h2></a>
445 <table border="1" cellpadding="5">
446 <tr>
447 <td>Lane </td>
448 <td>Tile </td>
449 <td>Clusters (raw)</td>
450 <td>Av 1st Cycle Int (PF) </td>
451 <td>Av % intensity after 20 cycles (PF) </td>
452 <td>% PF Clusters </td>
453 <td>% Align (PF) </td>
454 <td>Av Alignment Score (PF) </td>
455 <td>% Error Rate (PF) </td>
456 </tr>
457 <tr>
458 <td>2</td>
459 <td>0001</td>
460 <td>147793</td>
461 <td>448.12</td>
462 <td>83.68</td>
463 <td>38.57</td>
464 <td>53.7</td>
465 <td>6905.4</td>
466 <td>0.54</td>
467 </tr>
468 </table>
469 <td><a href="#Top">Back to top</a></td>
470 <a name="Lane3"><h2><br></br>Lane 3<br></br></h2></a>
471 <table border="1" cellpadding="5">
472 <tr>
473 <td>Lane </td>
474 <td>Tile </td>
475 <td>Clusters (raw)</td>
476 <td>Av 1st Cycle Int (PF) </td>
477 <td>Av % intensity after 20 cycles (PF) </td>
478 <td>% PF Clusters </td>
479 <td>% Align (PF) </td>
480 <td>Av Alignment Score (PF) </td>
481 <td>% Error Rate (PF) </td>
482 </tr>
483 <tr>
484 <td>3</td>
485 <td>0001</td>
486 <td>167904</td>
487 <td>374.05</td>
488 <td>86.91</td>
489 <td>40.36</td>
490 <td>81.3</td>
491 <td>10465.0</td>
492 <td>0.47</td>
493 </tr>
494 </table>
495 <td><a href="#Top">Back to top</a></td>
496 <a name="Lane4"><h2><br></br>Lane 4<br></br></h2></a>
497 <table border="1" cellpadding="5">
498 <tr>
499 <td>Lane </td>
500 <td>Tile </td>
501 <td>Clusters (raw)</td>
502 <td>Av 1st Cycle Int (PF) </td>
503 <td>Av % intensity after 20 cycles (PF) </td>
504 <td>% PF Clusters </td>
505 <td>% Align (PF) </td>
506 <td>Av Alignment Score (PF) </td>
507 <td>% Error Rate (PF) </td>
508 </tr>
509 <tr>
510 <td>4</td>
511 <td>0001</td>
512 <td>20308</td>
513 <td>276.85</td>
514 <td>92.87</td>
515 <td>84.26</td>
516 <td>80.4</td>
517 <td>10413.8</td>
518 <td>0.16</td>
519 </tr>
520 </table>
521 <td><a href="#Top">Back to top</a></td>
522 <a name="Lane5"><h2><br></br>Lane 5<br></br></h2></a>
523 <table border="1" cellpadding="5">
524 <tr>
525 <td>Lane </td>
526 <td>Tile </td>
527 <td>Clusters (raw)</td>
528 <td>Av 1st Cycle Int (PF) </td>
529 <td>Av % intensity after 20 cycles (PF) </td>
530 <td>% PF Clusters </td>
531 <td>% Align (PF) </td>
532 <td>Av Alignment Score (PF) </td>
533 <td>% Error Rate (PF) </td>
534 </tr>
535 </table>
536 <td><a href="#Top">Back to top</a></td>
537 <a name="Lane6"><h2><br></br>Lane 6<br></br></h2></a>
538 <table border="1" cellpadding="5">
539 <tr>
540 <td>Lane </td>
541 <td>Tile </td>
542 <td>Clusters (raw)</td>
543 <td>Av 1st Cycle Int (PF) </td>
544 <td>Av % intensity after 20 cycles (PF) </td>
545 <td>% PF Clusters </td>
546 <td>% Align (PF) </td>
547 <td>Av Alignment Score (PF) </td>
548 <td>% Error Rate (PF) </td>
549 </tr>
550 <tr>
551 <td>6</td>
552 <td>0001</td>
553 <td>166844</td>
554 <td>348.12</td>
555 <td>77.59</td>
556 <td>38.13</td>
557 <td>79.7</td>
558 <td>10264.4</td>
559 <td>0.44</td>
560 </tr>
561 </table>
562 <td><a href="#Top">Back to top</a></td>
563 <a name="Lane7"><h2><br></br>Lane 7<br></br></h2></a>
564 <table border="1" cellpadding="5">
565 <tr>
566 <td>Lane </td>
567 <td>Tile </td>
568 <td>Clusters (raw)</td>
569 <td>Av 1st Cycle Int (PF) </td>
570 <td>Av % intensity after 20 cycles (PF) </td>
571 <td>% PF Clusters </td>
572 <td>% Align (PF) </td>
573 <td>Av Alignment Score (PF) </td>
574 <td>% Error Rate (PF) </td>
575 </tr>
576 <tr>
577 <td>7</td>
578 <td>0001</td>
579 <td>98913</td>
580 <td>269.90</td>
581 <td>86.66</td>
582 <td>64.55</td>
583 <td>33.2</td>
584 <td>4217.5</td>
585 <td>1.02</td>
586 </tr>
587 </table>
588 <td><a href="#Top">Back to top</a></td>
589 <a name="Lane8"><h2><br></br>Lane 8<br></br></h2></a>
590 <table border="1" cellpadding="5">
591 <tr>
592 <td>Lane </td>
593 <td>Tile </td>
594 <td>Clusters (raw)</td>
595 <td>Av 1st Cycle Int (PF) </td>
596 <td>Av % intensity after 20 cycles (PF) </td>
597 <td>% PF Clusters </td>
598 <td>% Align (PF) </td>
599 <td>Av Alignment Score (PF) </td>
600 <td>% Error Rate (PF) </td>
601 </tr>
602 <tr>
603 <td>8</td>
604 <td>0001</td>
605 <td>64972</td>
606 <td>243.60</td>
607 <td>89.40</td>
608 <td>73.17</td>
609 <td>48.3</td>
610 <td>6182.8</td>
611 <td>0.71</td>
612 </tr>
613 </table>
614 <td><a href="#Top">Back to top</a></td>
615 </body>
616 </html>
617 """
618     pathname = os.path.join(gerald_dir, 'Summary.htm')
619     f = open(pathname, 'w')
620     f.write(summary_htm)
621     f.close()
622
623 def make_eland_results(gerald_dir):
624     eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759    ACATAGNCACAGACATAAACATAGACATAGAC U0      1       1       3       chrUextra.fa    28189829        R       D.
625 >HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA  U1      0       1       0       chr2L.fa        8796855 R       DD      24T
626 >HWI-EAS229_24_207BTAAXX:1:7:776:582    AGCTCANCCGATCGAAAACCTCNCCAAGCAAT        NM      0       0       0
627 >HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA        U1      0       1       0       Lambda.fa        8796855 R       DD      24T
628 """
629     for i in range(1,9):
630         pathname = os.path.join(gerald_dir,
631                                 's_%d_eland_result.txt' % (i,))
632         f = open(pathname, 'w')
633         f.write(eland_result)
634         f.close()
635
636 def make_runfolder(obj=None):
637     """
638     Make a fake runfolder, attach all the directories to obj if defined
639     """
640     # make a fake runfolder directory
641     temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
642
643     runfolder_dir = os.path.join(temp_dir,
644                                  '080102_HWI-EAS229_0010_207BTAAXX')
645     os.mkdir(runfolder_dir)
646
647     data_dir = os.path.join(runfolder_dir, 'Data')
648     os.mkdir(data_dir)
649
650     firecrest_dir = os.path.join(data_dir,
651                                  'C1-33_Firecrest1.8.28_12-04-2008_diane'
652                                  )
653     os.mkdir(firecrest_dir)
654     matrix_dir = os.path.join(firecrest_dir, 'Matrix')
655     os.mkdir(matrix_dir)
656     matrix_filename = os.path.join(matrix_dir, 's_matrix.txt')
657     make_matrix(matrix_filename)
658
659     bustard_dir = os.path.join(firecrest_dir,
660                                'Bustard1.8.28_12-04-2008_diane')
661     os.mkdir(bustard_dir)
662     make_phasing_params(bustard_dir)
663
664     gerald_dir = os.path.join(bustard_dir,
665                               'GERALD_12-04-2008_diane')
666     os.mkdir(gerald_dir)
667     make_gerald_config_026(gerald_dir)
668     make_summary_htm(gerald_dir)
669     make_eland_results(gerald_dir)
670
671     if obj is not None:
672         obj.temp_dir = temp_dir
673         obj.runfolder_dir = runfolder_dir
674         obj.data_dir = data_dir
675         obj.firecrest_dir = firecrest_dir
676         obj.matrix_dir = matrix_dir
677         obj.bustard_dir = bustard_dir
678         obj.gerald_dir = gerald_dir
679
680
681 class RunfolderTests(TestCase):
682     """
683     Test components of the runfolder processing code
684     which includes firecrest, bustard, and gerald
685     """
686     def setUp(self):
687         # attaches all the directories to the object passed in
688         make_runfolder(self)
689
690     def tearDown(self):
691         shutil.rmtree(self.temp_dir)
692
693     def test_firecrest(self):
694         """
695         Construct a firecrest object
696         """
697         f = firecrest.firecrest(self.firecrest_dir)
698         self.failUnlessEqual(f.software, 'Firecrest')
699         self.failUnlessEqual(f.version, '1.8.28')
700         self.failUnlessEqual(f.start, 1)
701         self.failUnlessEqual(f.stop, 33)
702         self.failUnlessEqual(f.user, 'diane')
703         self.failUnlessEqual(f.date, date(2008,4,12))
704
705         xml = f.get_elements()
706         # just make sure that element tree can serialize the tree
707         xml_str = ElementTree.tostring(xml)
708
709         f2 = firecrest.Firecrest(xml=xml)
710         self.failUnlessEqual(f.software, f2.software)
711         self.failUnlessEqual(f.version,  f2.version)
712         self.failUnlessEqual(f.start,    f2.start)
713         self.failUnlessEqual(f.stop,     f2.stop)
714         self.failUnlessEqual(f.user,     f2.user)
715         self.failUnlessEqual(f.date,     f2.date)
716
717     def test_bustard(self):
718         """
719         construct a bustard object
720         """
721         b = bustard.bustard(self.bustard_dir)
722         self.failUnlessEqual(b.software, 'Bustard')
723         self.failUnlessEqual(b.version, '1.8.28')
724         self.failUnlessEqual(b.date,    date(2008,4,12))
725         self.failUnlessEqual(b.user,    'diane')
726         self.failUnlessEqual(len(b.phasing), 8)
727         self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099)
728
729         xml = b.get_elements()
730         b2 = bustard.Bustard(xml=xml)
731         self.failUnlessEqual(b.software, b2.software)
732         self.failUnlessEqual(b.version,  b2.version)
733         self.failUnlessEqual(b.date,     b2.date )
734         self.failUnlessEqual(b.user,     b2.user)
735         self.failUnlessEqual(len(b.phasing), len(b2.phasing))
736         for key in b.phasing.keys():
737             self.failUnlessEqual(b.phasing[key].lane,
738                                  b2.phasing[key].lane)
739             self.failUnlessEqual(b.phasing[key].phasing,
740                                  b2.phasing[key].phasing)
741             self.failUnlessEqual(b.phasing[key].prephasing,
742                                  b2.phasing[key].prephasing)
743
744     def test_gerald(self):
745         # need to update gerald and make tests for it
746         g = gerald.gerald(self.gerald_dir)
747
748         self.failUnlessEqual(g.software, 'GERALD')
749         self.failUnlessEqual(g.version, '1.68.2.2')
750         self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30))
751         self.failUnlessEqual(len(g.lanes), len(g.lanes.keys()))
752         self.failUnlessEqual(len(g.lanes), len(g.lanes.items()))
753
754
755         # list of genomes, matches what was defined up in
756         # make_gerald_config.
757         # the first None is to offset the genomes list to be 1..9
758         # instead of pythons default 0..8
759         genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2',
760                          '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ]
761
762         # test lane specific parameters from gerald config file
763         for i in range(1,9):
764             cur_lane = g.lanes[i]
765             self.failUnlessEqual(cur_lane.analysis, 'eland')
766             self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
767             self.failUnlessEqual(cur_lane.read_length, '32')
768             self.failUnlessEqual(cur_lane.use_bases, 'Y'*32)
769
770         # test data extracted from summary file
771         clusters = [None,
772                     (96483, 9074), (133738, 7938),
773                     (152142, 10002), (15784, 2162),
774                     (119735, 8465), (152177, 8146),
775                     (84649, 7325), (54622, 4812),]
776
777         self.failUnlessEqual(len(g.summary), 1)
778         for i in range(1,9):
779             summary_lane = g.summary[0][i]
780             self.failUnlessEqual(summary_lane.cluster, clusters[i])
781             self.failUnlessEqual(summary_lane.lane, i)
782
783         xml = g.get_elements()
784         # just make sure that element tree can serialize the tree
785         xml_str = ElementTree.tostring(xml)
786         g2 = gerald.Gerald(xml=xml)
787
788         # do it all again after extracting from the xml file
789         self.failUnlessEqual(g.version, g2.version)
790         self.failUnlessEqual(g.date, g2.date)
791         self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys()))
792         self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items()))
793
794         # test lane specific parameters from gerald config file
795         for i in range(1,9):
796             g_lane = g.lanes[i]
797             g2_lane = g2.lanes[i]
798             self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
799             self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
800             self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
801             self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
802
803         # test (some) summary elements
804         self.failUnlessEqual(len(g.summary), 1)
805         for i in range(1,9):
806             g_summary = g.summary[0][i]
807             g2_summary = g2.summary[0][i]
808             self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
809             self.failUnlessEqual(g_summary.lane, g2_summary.lane)
810
811             g_eland = g.eland_results
812             g2_eland = g2.eland_results
813             for key in g_eland:
814                 g_results = g_eland[key]
815                 g2_results = g2_eland[key]
816                 self.failUnlessEqual(g_results.reads,
817                                      g2_results.reads)
818                 self.failUnlessEqual(len(g_results.mapped_reads),
819                                      len(g2_results.mapped_reads))
820                 for k in g_results.mapped_reads.keys():
821                     self.failUnlessEqual(g_results.mapped_reads[k],
822                                          g2_results.mapped_reads[k])
823
824                 self.failUnlessEqual(len(g_results.match_codes),
825                                      len(g2_results.match_codes))
826                 for k in g_results.match_codes.keys():
827                     self.failUnlessEqual(g_results.match_codes[k],
828                                          g2_results.match_codes[k])
829
830
831     def test_eland(self):
832         dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
833                     'chr2L.fa': 'dm3/chr2L.fa',
834                     'Lambda.fa': 'Lambda.fa'}
835         genome_maps = { 1:dm3_map, 2:dm3_map, 3:dm3_map, 4:dm3_map,
836                         5:dm3_map, 6:dm3_map, 7:dm3_map, 8:dm3_map }
837         eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
838
839         for key in eland:
840             lane = eland[key]
841             self.failUnlessEqual(lane.reads, 4)
842             self.failUnlessEqual(lane.sample_name, "s")
843             self.failUnlessEqual(lane.lane_id, key.lane)
844             self.failUnlessEqual(len(lane.mapped_reads), 3)
845             self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
846             self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
847             self.failUnlessEqual(lane.match_codes['U1'], 2)
848             self.failUnlessEqual(lane.match_codes['NM'], 1)
849
850         xml = eland.get_elements()
851         # just make sure that element tree can serialize the tree
852         xml_str = ElementTree.tostring(xml)
853         e2 = gerald.ELAND(xml=xml)
854
855         for key in eland:
856             l1 = eland[key]
857             l2 = e2[key]
858             self.failUnlessEqual(l1.reads, l2.reads)
859             self.failUnlessEqual(l1.sample_name, l2.sample_name)
860             self.failUnlessEqual(l1.lane_id, l2.lane_id)
861             self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
862             self.failUnlessEqual(len(l1.mapped_reads), 3)
863             for k in l1.mapped_reads.keys():
864                 self.failUnlessEqual(l1.mapped_reads[k],
865                                      l2.mapped_reads[k])
866
867             self.failUnlessEqual(len(l1.match_codes), 9)
868             self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
869             for k in l1.match_codes.keys():
870                 self.failUnlessEqual(l1.match_codes[k],
871                                      l2.match_codes[k])
872
873     def test_runfolder(self):
874         runs = runfolder.get_runs(self.runfolder_dir)
875
876         # do we get the flowcell id from the filename?
877         self.failUnlessEqual(len(runs), 1)
878         self.failUnlessEqual(runs[0].serialization_filename, 'run_207BTAAXX_2008-04-19.xml')
879
880         # do we get the flowcell id from the FlowcellId.xml file
881         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
882         runs = runfolder.get_runs(self.runfolder_dir)
883         self.failUnlessEqual(len(runs), 1)
884         self.failUnlessEqual(runs[0].serialization_filename, 'run_207BTAAXY_2008-04-19.xml')
885
886         r1 = runs[0]
887         xml = r1.get_elements()
888         xml_str = ElementTree.tostring(xml)
889
890         r2 = runfolder.PipelineRun(xml=xml)
891         self.failUnlessEqual(r1.serialization_filename, r2.serialization_filename)
892         self.failIfEqual(r2.image_analysis, None)
893         self.failIfEqual(r2.bustard, None)
894         self.failIfEqual(r2.gerald, None)
895
896
897 def suite():
898     from unittest import TestSuite, defaultTestLoader
899     suite = TestSuite()
900     suite.addTests(defaultTestLoader.loadTestsFromTestCase(RunfolderTests))
901     return suite
902
903 if __name__ == "__main__":
904     from unittest import main
905     main(defaultTest="suite")