rename pipeline to pipelines to imply that we can process more than just illumina.
[htsworkflow.git] / htsworkflow / pipelines / test / test_runfolder030.py
1 #!/usr/bin/env python
2
3 from datetime import datetime, date
4 import os
5 import tempfile
6 import shutil
7 import unittest
8
9 from htsworkflow.pipelines import firecrest
10 from htsworkflow.pipelines import bustard
11 from htsworkflow.pipelines import gerald
12 from htsworkflow.pipelines import runfolder
13 from htsworkflow.pipelines.runfolder import ElementTree
14
15
16 def make_flowcell_id(runfolder_dir, flowcell_id=None):
17     if flowcell_id is None:
18         flowcell_id = '207BTAAXY'
19
20     config = """<?xml version="1.0"?>
21 <FlowcellId>
22   <Text>%s</Text>
23 </FlowcellId>""" % (flowcell_id,)
24     config_dir = os.path.join(runfolder_dir, 'Config')
25     
26     if not os.path.exists(config_dir):
27         os.mkdir(config_dir)
28     pathname = os.path.join(config_dir, 'FlowcellId.xml')
29     f = open(pathname,'w')
30     f.write(config)
31     f.close()
32
33 def make_matrix(matrix_dir):
34     contents = """# Auto-generated frequency response matrix
35 > A
36 > C
37 > G
38 > T
39 0.77 0.15 -0.04 -0.04 
40 0.76 1.02 -0.05 -0.06 
41 -0.10 -0.10 1.17 -0.03 
42 -0.13 -0.12 0.80 1.27 
43 """
44     s_matrix = os.path.join(matrix_dir, 's_matrix.txt')
45     f = open(s_matrix, 'w')
46     f.write(contents)
47     f.close()
48     
49 def make_phasing_params(bustard_dir):
50     for lane in range(1,9):
51         pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane))
52         f = open(pathname, 'w')
53         f.write("""<Parameters>
54   <Phasing>0.009900</Phasing>
55   <Prephasing>0.003500</Prephasing>
56 </Parameters>
57 """)
58         f.close()
59
60 def make_gerald_config(gerald_dir):
61     config_xml = """<RunParameters>
62 <ChipWideRunParameters>
63   <ANALYSIS>default</ANALYSIS>
64   <BAD_LANES></BAD_LANES>
65   <BAD_TILES></BAD_TILES>
66   <CONTAM_DIR></CONTAM_DIR>
67   <CONTAM_FILE></CONTAM_FILE>
68   <ELAND_GENOME>Need_to_specify_ELAND_genome_directory</ELAND_GENOME>
69   <ELAND_MULTIPLE_INSTANCES>8</ELAND_MULTIPLE_INSTANCES>
70   <ELAND_REPEAT></ELAND_REPEAT>
71   <EMAIL_DOMAIN>domain.com</EMAIL_DOMAIN>
72   <EMAIL_LIST>diane</EMAIL_LIST>
73   <EMAIL_SERVER>localhost:25</EMAIL_SERVER>
74   <EXPT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane</EXPT_DIR>
75   <EXPT_DIR_ROOT>/home/diane/gec</EXPT_DIR_ROOT>
76   <FORCE>1</FORCE>
77   <GENOME_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald/../../Genomes</GENOME_DIR>
78   <GENOME_FILE>Need_to_specify_genome_file_name</GENOME_FILE>
79   <HAMSTER_FLAG>genome</HAMSTER_FLAG>
80   <OUT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane/GERALD_19-04-2008_diane</OUT_DIR>
81   <POST_RUN_COMMAND></POST_RUN_COMMAND>
82   <PRB_FILE_SUFFIX>_prb.txt</PRB_FILE_SUFFIX>
83   <PURE_BASES>12</PURE_BASES>
84   <QF_PARAMS>'((CHASTITY&gt;=0.6))'</QF_PARAMS>
85   <QHG_FILE_SUFFIX>_qhg.txt</QHG_FILE_SUFFIX>
86   <QUALITY_FORMAT>--symbolic</QUALITY_FORMAT>
87   <READ_LENGTH>32</READ_LENGTH>
88   <SEQUENCE_FORMAT>--scarf</SEQUENCE_FORMAT>
89   <SEQ_FILE_SUFFIX>_seq.txt</SEQ_FILE_SUFFIX>
90   <SIG_FILE_SUFFIX_DEPHASED>_sig2.txt</SIG_FILE_SUFFIX_DEPHASED>
91   <SIG_FILE_SUFFIX_NOT_DEPHASED>_sig.txt</SIG_FILE_SUFFIX_NOT_DEPHASED>
92   <SOFTWARE_VERSION>@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp</SOFTWARE_VERSION>
93   <TILE_REGEX>s_[1-8]_[0-9][0-9][0-9][0-9]</TILE_REGEX>
94   <TILE_ROOT>s</TILE_ROOT>
95   <TIME_STAMP>Sat Apr 19 19:08:30 2008</TIME_STAMP>
96   <TOOLS_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald</TOOLS_DIR>
97   <USE_BASES>all</USE_BASES>
98   <WEB_DIR_ROOT>http://host.domain.com/yourshare/</WEB_DIR_ROOT>
99 </ChipWideRunParameters>
100 <LaneSpecificRunParameters>
101   <ANALYSIS>
102     <s_1>eland</s_1>
103     <s_2>eland</s_2>
104     <s_3>eland</s_3>
105     <s_4>eland</s_4>
106     <s_5>eland</s_5>
107     <s_6>eland</s_6>
108     <s_7>eland</s_7>
109     <s_8>eland</s_8>
110   </ANALYSIS>
111   <ELAND_GENOME>
112     <s_1>/g/dm3</s_1>
113     <s_2>/g/equcab1</s_2>
114     <s_3>/g/equcab1</s_3>
115     <s_4>/g/canfam2</s_4>
116     <s_5>/g/hg18</s_5>
117     <s_6>/g/hg18</s_6>
118     <s_7>/g/hg18</s_7>
119     <s_8>/g/hg18</s_8>
120   </ELAND_GENOME>
121   <READ_LENGTH>
122     <s_1>32</s_1>
123     <s_2>32</s_2>
124     <s_3>32</s_3>
125     <s_4>32</s_4>
126     <s_5>32</s_5>
127     <s_6>32</s_6>
128     <s_7>32</s_7>
129     <s_8>32</s_8>
130   </READ_LENGTH>
131   <USE_BASES>
132     <s_1>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_1>
133     <s_2>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_2>
134     <s_3>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_3>
135     <s_4>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_4>
136     <s_5>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_5>
137     <s_6>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_6>
138     <s_7>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_7>
139     <s_8>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_8>
140   </USE_BASES>
141 </LaneSpecificRunParameters>
142 </RunParameters>
143 """
144     pathname = os.path.join(gerald_dir, 'config.xml')
145     f = open(pathname,'w')
146     f.write(config_xml)
147     f.close()
148     
149 def make_summary_htm(gerald_dir):
150     summary_htm="""<!--RUN_TIME Wed Jul  2 06:47:44 2008 -->
151 <!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.94 2007/12/04 09:59:07 rshaw Exp $-->
152 <html>
153 <body>
154
155 <a name="Top"><h2><title>080627_HWI-EAS229_0036_3055HAXX Summary</title></h2></a>
156 <h1>Summary Information For Experiment 080627_HWI-EAS229_0036_3055HAXX on Machine HWI-EAS229</h1>
157 <h2><br></br>Chip Summary<br></br></h2>
158 <table border="1" cellpadding="5">
159 <tr><td>Machine</td><td>HWI-EAS229</td></tr>
160 <tr><td>Run Folder</td><td>080627_HWI-EAS229_0036_3055HAXX</td></tr>
161 <tr><td>Chip ID</td><td>unknown</td></tr>
162 </table>
163 <h2><br></br>Chip Results Summary<br></br></h2>
164 <table border="1" cellpadding="5">
165 <tr>
166 <td>Clusters</td>
167 <td>Clusters (PF)</td>
168 <td>Yield (kbases)</td>
169 </tr>
170 <tr><td>80933224</td>
171 <td>43577803</td>
172 <td>1133022</td>
173 </tr>
174 </table>
175 <h2><br></br>Lane Parameter Summary<br></br></h2>
176 <table border="1" cellpadding="5">
177 <tr>
178 <td>Lane</td>
179 <td>Sample ID</td>
180 <td>Sample Target</td>
181 <td>Sample Type</td>
182 <td>Length</td>
183 <td>Filter</td>
184 <td>Num Tiles</td>
185 <td>Tiles</td>
186 </tr>
187 <tr>
188 <td>1</td>
189 <td>unknown</td>
190 <td>mm9</td>
191 <td>ELAND</td>
192 <td>26</td>
193 <td>'((CHASTITY>=0.6))'</td>
194 <td>100</td>
195 <td><a href="#Lane1">Lane 1</a></td>
196 </tr>
197 <tr>
198 <td>2</td>
199 <td>unknown</td>
200 <td>mm9</td>
201 <td>ELAND</td>
202 <td>26</td>
203 <td>'((CHASTITY>=0.6))'</td>
204 <td>100</td>
205 <td><a href="#Lane2">Lane 2</a></td>
206 </tr>
207 <tr>
208 <td>3</td>
209 <td>unknown</td>
210 <td>mm9</td>
211 <td>ELAND</td>
212 <td>26</td>
213 <td>'((CHASTITY>=0.6))'</td>
214 <td>100</td>
215 <td><a href="#Lane3">Lane 3</a></td>
216 </tr>
217 <tr>
218 <td>4</td>
219 <td>unknown</td>
220 <td>elegans170</td>
221 <td>ELAND</td>
222 <td>26</td>
223 <td>'((CHASTITY>=0.6))'</td>
224 <td>100</td>
225 <td><a href="#Lane4">Lane 4</a></td>
226 </tr>
227 <tr>
228 <td>5</td>
229 <td>unknown</td>
230 <td>elegans170</td>
231 <td>ELAND</td>
232 <td>26</td>
233 <td>'((CHASTITY>=0.6))'</td>
234 <td>100</td>
235 <td><a href="#Lane5">Lane 5</a></td>
236 </tr>
237 <tr>
238 <td>6</td>
239 <td>unknown</td>
240 <td>elegans170</td>
241 <td>ELAND</td>
242 <td>26</td>
243 <td>'((CHASTITY>=0.6))'</td>
244 <td>100</td>
245 <td><a href="#Lane6">Lane 6</a></td>
246 </tr>
247 <tr>
248 <td>7</td>
249 <td>unknown</td>
250 <td>elegans170</td>
251 <td>ELAND</td>
252 <td>26</td>
253 <td>'((CHASTITY>=0.6))'</td>
254 <td>100</td>
255 <td><a href="#Lane7">Lane 7</a></td>
256 </tr>
257 <tr>
258 <td>8</td>
259 <td>unknown</td>
260 <td>elegans170</td>
261 <td>ELAND</td>
262 <td>26</td>
263 <td>'((CHASTITY>=0.6))'</td>
264 <td>100</td>
265 <td><a href="#Lane8">Lane 8</a></td>
266 </tr>
267 </table>
268 <h2><br></br>Lane Results Summary<br></br></h2>
269 <table border="1" cellpadding="5">
270 <tr>
271 <td colspan="2">Lane Info</td>
272 <td colspan="8">Tile Mean +/- SD for Lane</td>
273 </tr>
274 <tr>
275 <td>Lane </td>
276 <td>Lane Yield (kbases) </td>
277 <td>Clusters (raw)</td>
278 <td>Clusters (PF) </td>
279 <td>1st Cycle Int (PF) </td>
280 <td>% intensity after 20 cycles (PF) </td>
281 <td>% PF Clusters </td>
282 <td>% Align (PF) </td>
283 <td>Alignment Score (PF) </td>
284 <td> % Error Rate (PF) </td>
285 </tr>
286 <tr>
287 <td>1</td>
288 <td>158046</td>
289 <td>96483 +/- 9074</td>
290 <td>60787 +/- 4240</td>
291 <td>329 +/- 35</td>
292 <td>101.88 +/- 6.03</td>
293 <td>63.21 +/- 3.29</td>
294 <td>70.33 +/- 0.24</td>
295 <td>9054.08 +/- 59.16</td>
296 <td>0.46 +/- 0.18</td>
297 </tr>
298 <tr>
299 <td>2</td>
300 <td>156564</td>
301 <td>133738 +/- 7938</td>
302 <td>60217 +/- 1926</td>
303 <td>444 +/- 39</td>
304 <td>92.62 +/- 7.58</td>
305 <td>45.20 +/- 3.31</td>
306 <td>51.98 +/- 0.74</td>
307 <td>6692.04 +/- 92.49</td>
308 <td>0.46 +/- 0.09</td>
309 </tr>
310 <tr>
311 <td>3</td>
312 <td>185818</td>
313 <td>152142 +/- 10002</td>
314 <td>71468 +/- 2827</td>
315 <td>366 +/- 36</td>
316 <td>91.53 +/- 8.66</td>
317 <td>47.19 +/- 3.80</td>
318 <td>82.24 +/- 0.44</td>
319 <td>10598.68 +/- 64.13</td>
320 <td>0.41 +/- 0.04</td>
321 </tr>
322 <tr>
323 <td>4</td>
324 <td>34953</td>
325 <td>15784 +/- 2162</td>
326 <td>13443 +/- 1728</td>
327 <td>328 +/- 40</td>
328 <td>97.53 +/- 9.87</td>
329 <td>85.29 +/- 1.91</td>
330 <td>80.02 +/- 0.53</td>
331 <td>10368.82 +/- 71.08</td>
332 <td>0.15 +/- 0.05</td>
333 </tr>
334 <tr>
335 <td>5</td>
336 <td>167936</td>
337 <td>119735 +/- 8465</td>
338 <td>64590 +/- 2529</td>
339 <td>417 +/- 37</td>
340 <td>88.69 +/- 14.79</td>
341 <td>54.10 +/- 2.59</td>
342 <td>76.95 +/- 0.32</td>
343 <td>9936.47 +/- 65.75</td>
344 <td>0.28 +/- 0.02</td>
345 </tr>
346 <tr>
347 <td>6</td>
348 <td>173463</td>
349 <td>152177 +/- 8146</td>
350 <td>66716 +/- 2493</td>
351 <td>372 +/- 39</td>
352 <td>87.06 +/- 9.86</td>
353 <td>43.98 +/- 3.12</td>
354 <td>78.80 +/- 0.43</td>
355 <td>10162.28 +/- 49.65</td>
356 <td>0.38 +/- 0.03</td>
357 </tr>
358 <tr>
359 <td>7</td>
360 <td>149287</td>
361 <td>84649 +/- 7325</td>
362 <td>57418 +/- 3617</td>
363 <td>295 +/- 28</td>
364 <td>89.40 +/- 8.23</td>
365 <td>67.97 +/- 1.82</td>
366 <td>33.38 +/- 0.25</td>
367 <td>4247.92 +/- 32.37</td>
368 <td>1.00 +/- 0.03</td>
369 </tr>
370 <tr>
371 <td>8</td>
372 <td>106953</td>
373 <td>54622 +/- 4812</td>
374 <td>41136 +/- 3309</td>
375 <td>284 +/- 37</td>
376 <td>90.21 +/- 9.10</td>
377 <td>75.39 +/- 2.27</td>
378 <td>48.33 +/- 0.29</td>
379 <td>6169.21 +/- 169.50</td>
380 <td>0.86 +/- 1.22</td>
381 </tr>
382 <tr><td colspan="13">Tile mean across chip</td></tr>
383 <tr>
384 <td>Av.</td>
385 <td></td>
386 <td>101166</td>
387 <td>54472</td>
388 <td>354</td>
389 <td>92.36</td>
390 <td>60.29</td>
391 <td>65.25</td>
392 <td>8403.69</td>
393 <td>0.50</td>
394 </tr>
395 </table>
396 <h2><br></br>Expanded Lane Summary<br></br></h2>
397 <table border="1" cellpadding="5">
398 <tr>
399
400 <tr><td colspan="2">Lane Info</td>
401 <td colspan="2">Phasing Info</td>
402 <td colspan="2">Raw Data (tile mean)</td>
403 <td colspan="7">Filtered Data (tile mean)</td></tr>
404 <td>Lane </td>
405 <td>Clusters (tile mean) (raw)</td>
406 <td>% Phasing </td>
407 <td>% Prephasing </td>
408 <td>% Error Rate (raw) </td>
409 <td> Equiv Perfect Clusters (raw) </td>
410 <td>% retained </td>
411 <td>Cycle 2-4 Av Int (PF) </td>
412 <td>Cycle 2-10 Av % Loss (PF) </td>
413 <td>Cycle 10-20 Av % Loss (PF) </td>
414 <td>% Align (PF) </td>
415 <td>% Error Rate (PF) </td>
416 <td> Equiv Perfect Clusters (PF) </td>
417 </tr>
418 <tr>
419 <td>1</td>
420 <td>96483</td>
421 <td>0.7700</td>
422 <td>0.3100</td>
423 <td>1.00</td>
424 <td>49676</td>
425 <td>63.21</td>
426 <td>317 +/- 32</td>
427 <td>0.13 +/- 0.44</td>
428 <td>-1.14 +/- 0.34</td>
429 <td>70.33</td>
430 <td>0.46</td>
431 <td>41758</td>
432 </tr>
433 <tr>
434 <td>2</td>
435 <td>133738</td>
436 <td>0.7700</td>
437 <td>0.3100</td>
438 <td>1.22</td>
439 <td>40467</td>
440 <td>45.20</td>
441 <td>415 +/- 33</td>
442 <td>0.29 +/- 0.40</td>
443 <td>-0.79 +/- 0.35</td>
444 <td>51.98</td>
445 <td>0.46</td>
446 <td>30615</td>
447 </tr>
448 <tr>
449 <td>3</td>
450 <td>152142</td>
451 <td>0.7700</td>
452 <td>0.3100</td>
453 <td>1.30</td>
454 <td>78588</td>
455 <td>47.19</td>
456 <td>344 +/- 26</td>
457 <td>0.68 +/- 0.51</td>
458 <td>-0.77 +/- 0.42</td>
459 <td>82.24</td>
460 <td>0.41</td>
461 <td>57552</td>
462 </tr>
463 <tr>
464 <td>4</td>
465 <td>15784</td>
466 <td>0.7700</td>
467 <td>0.3100</td>
468 <td>0.29</td>
469 <td>11095</td>
470 <td>85.29</td>
471 <td>306 +/- 34</td>
472 <td>0.20 +/- 0.69</td>
473 <td>-1.28 +/- 0.66</td>
474 <td>80.02</td>
475 <td>0.15</td>
476 <td>10671</td>
477 </tr>
478 <tr>
479 <td>5</td>
480 <td>119735</td>
481 <td>0.7700</td>
482 <td>0.3100</td>
483 <td>0.85</td>
484 <td>60335</td>
485 <td>54.10</td>
486 <td>380 +/- 32</td>
487 <td>0.34 +/- 0.49</td>
488 <td>-1.55 +/- 4.69</td>
489 <td>76.95</td>
490 <td>0.28</td>
491 <td>49015</td>
492 </tr>
493 <tr>
494 <td>6</td>
495 <td>152177</td>
496 <td>0.7700</td>
497 <td>0.3100</td>
498 <td>1.21</td>
499 <td>70905</td>
500 <td>43.98</td>
501 <td>333 +/- 27</td>
502 <td>0.57 +/- 0.50</td>
503 <td>-0.91 +/- 0.39</td>
504 <td>78.80</td>
505 <td>0.38</td>
506 <td>51663</td>
507 </tr>
508 <tr>
509 <td>7</td>
510 <td>84649</td>
511 <td>0.7700</td>
512 <td>0.3100</td>
513 <td>1.38</td>
514 <td>21069</td>
515 <td>67.97</td>
516 <td>272 +/- 20</td>
517 <td>1.15 +/- 0.52</td>
518 <td>-0.84 +/- 0.58</td>
519 <td>33.38</td>
520 <td>1.00</td>
521 <td>18265</td>
522 </tr>
523 <tr>
524 <td>8</td>
525 <td>54622</td>
526 <td>0.7700</td>
527 <td>0.3100</td>
528 <td>1.17</td>
529 <td>21335</td>
530 <td>75.39</td>
531 <td>262 +/- 31</td>
532 <td>1.10 +/- 0.59</td>
533 <td>-1.01 +/- 0.47</td>
534 <td>48.33</td>
535 <td>0.86</td>
536 <td>19104</td>
537 </tr>
538 </table>
539 <b><br></br>IVC Plots</b>
540 <p> <a href='IVC.htm' target="_blank"> IVC.htm
541  </a></p>
542 <b><br></br>All Intensity Plots</b>
543 <p> <a href='All.htm' target="_blank"> All.htm
544  </a></p>
545 <b><br></br>Error graphs: </b>
546 <p> <a href='Error.htm' target="_blank"> Error.htm
547  </a></p>
548 <td><a href="#Top">Back to top</a></td>
549 <a name="Lane1"><h2><br></br>Lane 1<br></br></h2></a>
550 <table border="1" cellpadding="5">
551 <tr>
552 <td>Lane </td>
553 <td>Tile </td>
554 <td>Clusters (raw)</td>
555 <td>Av 1st Cycle Int (PF) </td>
556 <td>Av % intensity after 20 cycles (PF) </td>
557 <td>% PF Clusters </td>
558 <td>% Align (PF) </td>
559 <td>Av Alignment Score (PF) </td>
560 <td>% Error Rate (PF) </td>
561 </tr>
562 <tr>
563 <td>1</td>
564 <td>0001</td>
565 <td>114972</td>
566 <td>326.48</td>
567 <td>94.39</td>
568 <td>57.44</td>
569 <td>70.2</td>
570 <td>9038.6</td>
571 <td>0.44</td>
572 </tr>
573 </table>
574 <td><a href="#Top">Back to top</a></td>
575 <a name="Lane2"><h2><br></br>Lane 2<br></br></h2></a>
576 <table border="1" cellpadding="5">
577 <tr>
578 <td>Lane </td>
579 <td>Tile </td>
580 <td>Clusters (raw)</td>
581 <td>Av 1st Cycle Int (PF) </td>
582 <td>Av % intensity after 20 cycles (PF) </td>
583 <td>% PF Clusters </td>
584 <td>% Align (PF) </td>
585 <td>Av Alignment Score (PF) </td>
586 <td>% Error Rate (PF) </td>
587 </tr>
588 <tr>
589 <td>2</td>
590 <td>0001</td>
591 <td>147793</td>
592 <td>448.12</td>
593 <td>83.68</td>
594 <td>38.57</td>
595 <td>53.7</td>
596 <td>6905.4</td>
597 <td>0.54</td>
598 </tr>
599 </table>
600 <td><a href="#Top">Back to top</a></td>
601 <a name="Lane3"><h2><br></br>Lane 3<br></br></h2></a>
602 <table border="1" cellpadding="5">
603 <tr>
604 <td>Lane </td>
605 <td>Tile </td>
606 <td>Clusters (raw)</td>
607 <td>Av 1st Cycle Int (PF) </td>
608 <td>Av % intensity after 20 cycles (PF) </td>
609 <td>% PF Clusters </td>
610 <td>% Align (PF) </td>
611 <td>Av Alignment Score (PF) </td>
612 <td>% Error Rate (PF) </td>
613 </tr>
614 <tr>
615 <td>3</td>
616 <td>0001</td>
617 <td>167904</td>
618 <td>374.05</td>
619 <td>86.91</td>
620 <td>40.36</td>
621 <td>81.3</td>
622 <td>10465.0</td>
623 <td>0.47</td>
624 </tr>
625 </table>
626 <td><a href="#Top">Back to top</a></td>
627 <a name="Lane4"><h2><br></br>Lane 4<br></br></h2></a>
628 <table border="1" cellpadding="5">
629 <tr>
630 <td>Lane </td>
631 <td>Tile </td>
632 <td>Clusters (raw)</td>
633 <td>Av 1st Cycle Int (PF) </td>
634 <td>Av % intensity after 20 cycles (PF) </td>
635 <td>% PF Clusters </td>
636 <td>% Align (PF) </td>
637 <td>Av Alignment Score (PF) </td>
638 <td>% Error Rate (PF) </td>
639 </tr>
640 <tr>
641 <td>4</td>
642 <td>0001</td>
643 <td>20308</td>
644 <td>276.85</td>
645 <td>92.87</td>
646 <td>84.26</td>
647 <td>80.4</td>
648 <td>10413.8</td>
649 <td>0.16</td>
650 </tr>
651 </table>
652 <td><a href="#Top">Back to top</a></td>
653 <a name="Lane5"><h2><br></br>Lane 5<br></br></h2></a>
654 <table border="1" cellpadding="5">
655 <tr>
656 <td>Lane </td>
657 <td>Tile </td>
658 <td>Clusters (raw)</td>
659 <td>Av 1st Cycle Int (PF) </td>
660 <td>Av % intensity after 20 cycles (PF) </td>
661 <td>% PF Clusters </td>
662 <td>% Align (PF) </td>
663 <td>Av Alignment Score (PF) </td>
664 <td>% Error Rate (PF) </td>
665 </tr>
666 </table>
667 <td><a href="#Top">Back to top</a></td>
668 <a name="Lane6"><h2><br></br>Lane 6<br></br></h2></a>
669 <table border="1" cellpadding="5">
670 <tr>
671 <td>Lane </td>
672 <td>Tile </td>
673 <td>Clusters (raw)</td>
674 <td>Av 1st Cycle Int (PF) </td>
675 <td>Av % intensity after 20 cycles (PF) </td>
676 <td>% PF Clusters </td>
677 <td>% Align (PF) </td>
678 <td>Av Alignment Score (PF) </td>
679 <td>% Error Rate (PF) </td>
680 </tr>
681 <tr>
682 <td>6</td>
683 <td>0001</td>
684 <td>166844</td>
685 <td>348.12</td>
686 <td>77.59</td>
687 <td>38.13</td>
688 <td>79.7</td>
689 <td>10264.4</td>
690 <td>0.44</td>
691 </tr>
692 </table>
693 <td><a href="#Top">Back to top</a></td>
694 <a name="Lane7"><h2><br></br>Lane 7<br></br></h2></a>
695 <table border="1" cellpadding="5">
696 <tr>
697 <td>Lane </td>
698 <td>Tile </td>
699 <td>Clusters (raw)</td>
700 <td>Av 1st Cycle Int (PF) </td>
701 <td>Av % intensity after 20 cycles (PF) </td>
702 <td>% PF Clusters </td>
703 <td>% Align (PF) </td>
704 <td>Av Alignment Score (PF) </td>
705 <td>% Error Rate (PF) </td>
706 </tr>
707 <tr>
708 <td>7</td>
709 <td>0001</td>
710 <td>98913</td>
711 <td>269.90</td>
712 <td>86.66</td>
713 <td>64.55</td>
714 <td>33.2</td>
715 <td>4217.5</td>
716 <td>1.02</td>
717 </tr>
718 </table>
719 <td><a href="#Top">Back to top</a></td>
720 <a name="Lane8"><h2><br></br>Lane 8<br></br></h2></a>
721 <table border="1" cellpadding="5">
722 <tr>
723 <td>Lane </td>
724 <td>Tile </td>
725 <td>Clusters (raw)</td>
726 <td>Av 1st Cycle Int (PF) </td>
727 <td>Av % intensity after 20 cycles (PF) </td>
728 <td>% PF Clusters </td>
729 <td>% Align (PF) </td>
730 <td>Av Alignment Score (PF) </td>
731 <td>% Error Rate (PF) </td>
732 </tr>
733 <tr>
734 <td>8</td>
735 <td>0001</td>
736 <td>64972</td>
737 <td>243.60</td>
738 <td>89.40</td>
739 <td>73.17</td>
740 <td>48.3</td>
741 <td>6182.8</td>
742 <td>0.71</td>
743 </tr>
744 </table>
745 <td><a href="#Top">Back to top</a></td>
746 </body>
747 </html>
748 """
749     pathname = os.path.join(gerald_dir, 'Summary.htm')
750     f = open(pathname, 'w')
751     f.write(summary_htm)
752     f.close()
753
754 def make_eland_results(gerald_dir):
755     eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759    ACATAGNCACAGACATAAACATAGACATAGAC U0      1       1       3       chrUextra.fa    28189829        R       D.
756 >HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA  U1      0       1       0       chr2L.fa        8796855 R       DD      24T
757 >HWI-EAS229_24_207BTAAXX:1:7:776:582    AGCTCANCCGATCGAAAACCTCNCCAAGCAAT        NM      0       0       0
758 >HWI-EAS229_24_207BTAAXX:1:7:205:842    AAACAANNCTCCCAAACACGTAAACTGGAAAA        U1      0       1       0       Lambda.fa        8796855 R       DD      24T
759 """
760     for i in range(1,9):
761         pathname = os.path.join(gerald_dir, 
762                                 's_%d_eland_result.txt' % (i,))
763         f = open(pathname, 'w')
764         f.write(eland_result)
765         f.close()
766
767 def make_runfolder(obj=None):
768     """
769     Make a fake runfolder, attach all the directories to obj if defined
770     """
771     # make a fake runfolder directory
772     temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
773
774     runfolder_dir = os.path.join(temp_dir, 
775                                  '080102_HWI-EAS229_0010_207BTAAXX')
776     os.mkdir(runfolder_dir)
777
778     data_dir = os.path.join(runfolder_dir, 'Data')
779     os.mkdir(data_dir)
780
781     firecrest_dir = os.path.join(data_dir, 
782                                  'C1-33_Firecrest1.8.28_12-04-2008_diane'
783                                  )
784     os.mkdir(firecrest_dir)
785     matrix_dir = os.path.join(firecrest_dir, 'Matrix')
786     os.mkdir(matrix_dir)
787     make_matrix(matrix_dir)
788
789     bustard_dir = os.path.join(firecrest_dir, 
790                                'Bustard1.8.28_12-04-2008_diane')
791     os.mkdir(bustard_dir)
792     make_phasing_params(bustard_dir)
793
794     gerald_dir = os.path.join(bustard_dir,
795                               'GERALD_12-04-2008_diane')
796     os.mkdir(gerald_dir)
797     make_gerald_config(gerald_dir)
798     make_summary_htm(gerald_dir)
799     make_eland_results(gerald_dir)
800
801     if obj is not None:
802         obj.temp_dir = temp_dir
803         obj.runfolder_dir = runfolder_dir
804         obj.data_dir = data_dir
805         obj.firecrest_dir = firecrest_dir
806         obj.matrix_dir = matrix_dir
807         obj.bustard_dir = bustard_dir
808         obj.gerald_dir = gerald_dir
809         
810                      
811 class RunfolderTests(unittest.TestCase):
812     """
813     Test components of the runfolder processing code
814     which includes firecrest, bustard, and gerald
815     """
816     def setUp(self):
817         # attaches all the directories to the object passed in
818         make_runfolder(self)
819
820     def tearDown(self):
821         shutil.rmtree(self.temp_dir)
822
823     def test_firecrest(self):
824         """
825         Construct a firecrest object
826         """
827         f = firecrest.firecrest(self.firecrest_dir)
828         self.failUnlessEqual(f.version, '1.8.28')
829         self.failUnlessEqual(f.start, 1)
830         self.failUnlessEqual(f.stop, 33)
831         self.failUnlessEqual(f.user, 'diane')
832         self.failUnlessEqual(f.date, date(2008,4,12))
833
834         xml = f.get_elements()
835         # just make sure that element tree can serialize the tree
836         xml_str = ElementTree.tostring(xml)
837
838         f2 = firecrest.Firecrest(xml=xml)
839         self.failUnlessEqual(f.version, f2.version)
840         self.failUnlessEqual(f.start,   f2.start)
841         self.failUnlessEqual(f.stop,    f2.stop)
842         self.failUnlessEqual(f.user,    f2.user)
843         self.failUnlessEqual(f.date,    f2.date)
844
845     def test_bustard(self):
846         """
847         construct a bustard object
848         """
849         b = bustard.bustard(self.bustard_dir)
850         self.failUnlessEqual(b.version, '1.8.28')
851         self.failUnlessEqual(b.date,    date(2008,4,12))
852         self.failUnlessEqual(b.user,    'diane')
853         self.failUnlessEqual(len(b.phasing), 8)
854         self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099)
855         
856         xml = b.get_elements()
857         b2 = bustard.Bustard(xml=xml)
858         self.failUnlessEqual(b.version, b2.version)
859         self.failUnlessEqual(b.date,    b2.date )
860         self.failUnlessEqual(b.user,    b2.user)
861         self.failUnlessEqual(len(b.phasing), len(b2.phasing))
862         for key in b.phasing.keys():
863             self.failUnlessEqual(b.phasing[key].lane, 
864                                  b2.phasing[key].lane)
865             self.failUnlessEqual(b.phasing[key].phasing, 
866                                  b2.phasing[key].phasing)
867             self.failUnlessEqual(b.phasing[key].prephasing, 
868                                  b2.phasing[key].prephasing)
869
870     def test_gerald(self):
871         # need to update gerald and make tests for it
872         g = gerald.gerald(self.gerald_dir) 
873
874         self.failUnlessEqual(g.version, 
875             '@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp')
876         self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30))
877         self.failUnlessEqual(len(g.lanes), len(g.lanes.keys()))
878         self.failUnlessEqual(len(g.lanes), len(g.lanes.items()))
879
880         
881         # list of genomes, matches what was defined up in 
882         # make_gerald_config.
883         # the first None is to offset the genomes list to be 1..9
884         # instead of pythons default 0..8
885         genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2',
886                          '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ]
887
888         # test lane specific parameters from gerald config file
889         for i in range(1,9):
890             cur_lane = g.lanes[str(i)]
891             self.failUnlessEqual(cur_lane.analysis, 'eland')
892             self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
893             self.failUnlessEqual(cur_lane.read_length, '32')
894             self.failUnlessEqual(cur_lane.use_bases, 'Y'*32)
895
896         # test data extracted from summary file
897         clusters = [None, 
898                     (96483, 9074), (133738, 7938), 
899                     (152142, 10002), (15784, 2162), 
900                     (119735, 8465), (152177, 8146),
901                     (84649, 7325), (54622, 4812),]
902
903         for i in range(1,9):
904             summary_lane = g.summary[str(i)]
905             self.failUnlessEqual(summary_lane.cluster, clusters[i])
906             self.failUnlessEqual(summary_lane.lane, str(i))
907
908         xml = g.get_elements()
909         # just make sure that element tree can serialize the tree
910         xml_str = ElementTree.tostring(xml)
911         g2 = gerald.Gerald(xml=xml)
912
913         # do it all again after extracting from the xml file
914         self.failUnlessEqual(g.version, g2.version)
915         self.failUnlessEqual(g.date, g2.date)
916         self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys()))
917         self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items()))
918
919         # test lane specific parameters from gerald config file
920         for i in range(1,9):
921             g_lane = g.lanes[str(i)]
922             g2_lane = g2.lanes[str(i)]
923             self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
924             self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
925             self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
926             self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
927
928         # test (some) summary elements
929         for i in range(1,9):
930             g_summary = g.summary[str(i)]
931             g2_summary = g2.summary[str(i)]
932             self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
933             self.failUnlessEqual(g_summary.lane, g2_summary.lane)
934
935             g_eland = g.eland_results
936             g2_eland = g2.eland_results
937             for lane in g_eland.keys():
938                 self.failUnlessEqual(g_eland[lane].reads, 
939                                      g2_eland[lane].reads)
940                 self.failUnlessEqual(len(g_eland[lane].mapped_reads), 
941                                      len(g2_eland[lane].mapped_reads))
942                 for k in g_eland[lane].mapped_reads.keys():
943                     self.failUnlessEqual(g_eland[lane].mapped_reads[k],
944                                          g2_eland[lane].mapped_reads[k])
945
946                 self.failUnlessEqual(len(g_eland[lane].match_codes), 
947                                      len(g2_eland[lane].match_codes))
948                 for k in g_eland[lane].match_codes.keys():
949                     self.failUnlessEqual(g_eland[lane].match_codes[k],
950                                          g2_eland[lane].match_codes[k])
951
952
953     def test_eland(self):
954         dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
955                     'chr2L.fa': 'dm3/chr2L.fa',
956                     'Lambda.fa': 'Lambda.fa'}
957         genome_maps = { '1':dm3_map, '2':dm3_map, '3':dm3_map, '4':dm3_map,
958                         '5':dm3_map, '6':dm3_map, '7':dm3_map, '8':dm3_map }
959         eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
960         
961         for i in range(1,9):
962             lane = eland[str(i)]
963             self.failUnlessEqual(lane.reads, 4)
964             self.failUnlessEqual(lane.sample_name, "s")
965             self.failUnlessEqual(lane.lane_id, unicode(i))
966             self.failUnlessEqual(len(lane.mapped_reads), 3)
967             self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
968             self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
969             self.failUnlessEqual(lane.match_codes['U1'], 2)
970             self.failUnlessEqual(lane.match_codes['NM'], 1)
971
972         xml = eland.get_elements()
973         # just make sure that element tree can serialize the tree
974         xml_str = ElementTree.tostring(xml)
975         e2 = gerald.ELAND(xml=xml)
976
977         for i in range(1,9):
978             l1 = eland[str(i)]
979             l2 = e2[str(i)]
980             self.failUnlessEqual(l1.reads, l2.reads)
981             self.failUnlessEqual(l1.sample_name, l2.sample_name)
982             self.failUnlessEqual(l1.lane_id, l2.lane_id)
983             self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
984             self.failUnlessEqual(len(l1.mapped_reads), 3)
985             for k in l1.mapped_reads.keys():
986                 self.failUnlessEqual(l1.mapped_reads[k],
987                                      l2.mapped_reads[k])
988
989             self.failUnlessEqual(len(l1.match_codes), 9)
990             self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
991             for k in l1.match_codes.keys():
992                 self.failUnlessEqual(l1.match_codes[k], 
993                                      l2.match_codes[k])
994
995     def test_runfolder(self):
996         runs = runfolder.get_runs(self.runfolder_dir)
997         
998         # do we get the flowcell id from the filename?
999         self.failUnlessEqual(len(runs), 1)
1000         self.failUnlessEqual(runs[0].name, 'run_207BTAAXX_2008-04-19.xml')
1001
1002         # do we get the flowcell id from the FlowcellId.xml file
1003         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
1004         runs = runfolder.get_runs(self.runfolder_dir)
1005         self.failUnlessEqual(len(runs), 1)
1006         self.failUnlessEqual(runs[0].name, 'run_207BTAAXY_2008-04-19.xml')
1007         
1008         r1 = runs[0]
1009         xml = r1.get_elements()
1010         xml_str = ElementTree.tostring(xml)
1011
1012         r2 = runfolder.PipelineRun(xml=xml)
1013         self.failUnlessEqual(r1.name, r2.name)
1014         self.failIfEqual(r2.firecrest, None)
1015         self.failIfEqual(r2.bustard, None)
1016         self.failIfEqual(r2.gerald, None)
1017         
1018
1019 def suite():
1020     return unittest.makeSuite(RunfolderTests,'test')
1021
1022 if __name__ == "__main__":
1023     unittest.main(defaultTest="suite")
1024