2 Create simulated solexa/illumina runfolders for testing
8 def make_firecrest_dir(data_dir, version="1.9.2", start=1, stop=37):
9 firecrest_dir = os.path.join(data_dir,
10 'C%d-%d_Firecrest%s_12-04-2008_diane' % (start, stop, version)
12 os.mkdir(firecrest_dir)
15 def make_ipar_dir(data_dir):
17 Construct an artificial ipar parameter file and directory
19 params = """<?xml version="1.0"?>
21 <Run Name="IPAR_1.01">
22 <Software Name="IPAR" Version="2.01.192.0" />
23 <Cycles First="1" Last="37" Number="37" />
25 <ImagingReads Index="1">
26 <FirstCycle>1</FirstCycle>
27 <LastCycle>37</LastCycle>
28 <RunFolder>081021_HWI-EAS229_0063_30HKUAAXX</RunFolder>
31 <FirstCycle>1</FirstCycle>
32 <LastCycle>37</LastCycle>
33 <RunFolder>081021_HWI-EAS229_0063_30HKUAAXX</RunFolder>
35 <Compression>gzip</Compression>
36 <CompressionSuffix>.p.gz</CompressionSuffix>
37 <Instrument>HWI-EAS229</Instrument>
38 <RunFolder>081021_HWI-EAS229_0063_30HKUAAXX</RunFolder>
41 <AutoOffsetFlag>1</AutoOffsetFlag>
43 <RemappingDistance>1.5</RemappingDistance>
44 <Threshold>4</Threshold>
49 <TileRange Max="100" Min="1" />
53 <TileRange Max="100" Min="1" />
57 <TileRange Max="100" Min="1" />
61 <TileRange Max="100" Min="1" />
65 <TileRange Max="100" Min="1" />
69 <TileRange Max="100" Min="1" />
73 <TileRange Max="100" Min="1" />
77 <TileRange Max="100" Min="1" />
83 f = open(os.path.join(data_dir, '.params'),'w')
86 ipar_dir = os.path.join(data_dir, 'IPAR_1.01')
87 if not os.path.exists(ipar_dir):
91 def make_flowcell_id(runfolder_dir, flowcell_id=None):
92 if flowcell_id is None:
93 flowcell_id = '207BTAAXY'
95 config = """<?xml version="1.0"?>
98 </FlowcellId>""" % (flowcell_id,)
99 config_dir = os.path.join(runfolder_dir, 'Config')
101 if not os.path.exists(config_dir):
103 pathname = os.path.join(config_dir, 'FlowcellId.xml')
104 f = open(pathname,'w')
108 def make_matrix(matrix_dir):
109 contents = """# Auto-generated frequency response matrix
114 0.77 0.15 -0.04 -0.04
115 0.76 1.02 -0.05 -0.06
116 -0.10 -0.10 1.17 -0.03
117 -0.13 -0.12 0.80 1.27
119 s_matrix = os.path.join(matrix_dir, 's_matrix.txt')
120 f = open(s_matrix, 'w')
124 def make_phasing_params(bustard_dir):
125 for lane in range(1,9):
126 pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane))
127 f = open(pathname, 'w')
128 f.write("""<Parameters>
129 <Phasing>0.009900</Phasing>
130 <Prephasing>0.003500</Prephasing>
135 def make_gerald_config(gerald_dir):
136 config_xml = """<RunParameters>
137 <ChipWideRunParameters>
138 <ANALYSIS>default</ANALYSIS>
139 <BAD_LANES></BAD_LANES>
140 <BAD_TILES></BAD_TILES>
141 <CONTAM_DIR></CONTAM_DIR>
142 <CONTAM_FILE></CONTAM_FILE>
143 <ELAND_GENOME>Need_to_specify_ELAND_genome_directory</ELAND_GENOME>
144 <ELAND_MULTIPLE_INSTANCES>8</ELAND_MULTIPLE_INSTANCES>
145 <ELAND_REPEAT></ELAND_REPEAT>
146 <EMAIL_DOMAIN>domain.com</EMAIL_DOMAIN>
147 <EMAIL_LIST>diane</EMAIL_LIST>
148 <EMAIL_SERVER>localhost:25</EMAIL_SERVER>
149 <EXPT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane</EXPT_DIR>
150 <EXPT_DIR_ROOT>/home/diane/gec</EXPT_DIR_ROOT>
152 <GENOME_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald/../../Genomes</GENOME_DIR>
153 <GENOME_FILE>Need_to_specify_genome_file_name</GENOME_FILE>
154 <HAMSTER_FLAG>genome</HAMSTER_FLAG>
155 <OUT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane/GERALD_19-04-2008_diane</OUT_DIR>
156 <POST_RUN_COMMAND></POST_RUN_COMMAND>
157 <PRB_FILE_SUFFIX>_prb.txt</PRB_FILE_SUFFIX>
158 <PURE_BASES>12</PURE_BASES>
159 <QF_PARAMS>'((CHASTITY>=0.6))'</QF_PARAMS>
160 <QHG_FILE_SUFFIX>_qhg.txt</QHG_FILE_SUFFIX>
161 <QUALITY_FORMAT>--symbolic</QUALITY_FORMAT>
162 <READ_LENGTH>32</READ_LENGTH>
163 <SEQUENCE_FORMAT>--scarf</SEQUENCE_FORMAT>
164 <SEQ_FILE_SUFFIX>_seq.txt</SEQ_FILE_SUFFIX>
165 <SIG_FILE_SUFFIX_DEPHASED>_sig2.txt</SIG_FILE_SUFFIX_DEPHASED>
166 <SIG_FILE_SUFFIX_NOT_DEPHASED>_sig.txt</SIG_FILE_SUFFIX_NOT_DEPHASED>
167 <SOFTWARE_VERSION>@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp</SOFTWARE_VERSION>
168 <TILE_REGEX>s_[1-8]_[0-9][0-9][0-9][0-9]</TILE_REGEX>
169 <TILE_ROOT>s</TILE_ROOT>
170 <TIME_STAMP>Sat Apr 19 19:08:30 2008</TIME_STAMP>
171 <TOOLS_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald</TOOLS_DIR>
172 <USE_BASES>all</USE_BASES>
173 <WEB_DIR_ROOT>http://host.domain.com/yourshare/</WEB_DIR_ROOT>
174 </ChipWideRunParameters>
175 <LaneSpecificRunParameters>
188 <s_2>/g/equcab1</s_2>
189 <s_3>/g/equcab1</s_3>
190 <s_4>/g/canfam2</s_4>
207 <s_1>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_1>
208 <s_2>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_2>
209 <s_3>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_3>
210 <s_4>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_4>
211 <s_5>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_5>
212 <s_6>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_6>
213 <s_7>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_7>
214 <s_8>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_8>
216 </LaneSpecificRunParameters>
219 pathname = os.path.join(gerald_dir, 'config.xml')
220 f = open(pathname,'w')
224 def make_summary100_htm(gerald_dir):
225 summary_htm="""<!--RUN_TIME Wed Jul 2 06:47:44 2008 -->
226 <!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.94 2007/12/04 09:59:07 rshaw Exp $-->
230 <a name="Top"><h2><title>080627_HWI-EAS229_0036_3055HAXX Summary</title></h2></a>
231 <h1>Summary Information For Experiment 080627_HWI-EAS229_0036_3055HAXX on Machine HWI-EAS229</h1>
232 <h2><br></br>Chip Summary<br></br></h2>
233 <table border="1" cellpadding="5">
234 <tr><td>Machine</td><td>HWI-EAS229</td></tr>
235 <tr><td>Run Folder</td><td>080627_HWI-EAS229_0036_3055HAXX</td></tr>
236 <tr><td>Chip ID</td><td>unknown</td></tr>
238 <h2><br></br>Chip Results Summary<br></br></h2>
239 <table border="1" cellpadding="5">
242 <td>Clusters (PF)</td>
243 <td>Yield (kbases)</td>
245 <tr><td>80933224</td>
250 <h2><br></br>Lane Parameter Summary<br></br></h2>
251 <table border="1" cellpadding="5">
255 <td>Sample Target</td>
268 <td>'((CHASTITY>=0.6))'</td>
270 <td><a href="#Lane1">Lane 1</a></td>
278 <td>'((CHASTITY>=0.6))'</td>
280 <td><a href="#Lane2">Lane 2</a></td>
288 <td>'((CHASTITY>=0.6))'</td>
290 <td><a href="#Lane3">Lane 3</a></td>
298 <td>'((CHASTITY>=0.6))'</td>
300 <td><a href="#Lane4">Lane 4</a></td>
308 <td>'((CHASTITY>=0.6))'</td>
310 <td><a href="#Lane5">Lane 5</a></td>
318 <td>'((CHASTITY>=0.6))'</td>
320 <td><a href="#Lane6">Lane 6</a></td>
328 <td>'((CHASTITY>=0.6))'</td>
330 <td><a href="#Lane7">Lane 7</a></td>
338 <td>'((CHASTITY>=0.6))'</td>
340 <td><a href="#Lane8">Lane 8</a></td>
343 <h2><br></br>Lane Results Summary<br></br></h2>
344 <table border="1" cellpadding="5">
346 <td colspan="2">Lane Info</td>
347 <td colspan="8">Tile Mean +/- SD for Lane</td>
351 <td>Lane Yield (kbases) </td>
352 <td>Clusters (raw)</td>
353 <td>Clusters (PF) </td>
354 <td>1st Cycle Int (PF) </td>
355 <td>% intensity after 20 cycles (PF) </td>
356 <td>% PF Clusters </td>
357 <td>% Align (PF) </td>
358 <td>Alignment Score (PF) </td>
359 <td> % Error Rate (PF) </td>
364 <td>96483 +/- 9074</td>
365 <td>60787 +/- 4240</td>
367 <td>101.88 +/- 6.03</td>
368 <td>63.21 +/- 3.29</td>
369 <td>70.33 +/- 0.24</td>
370 <td>9054.08 +/- 59.16</td>
371 <td>0.46 +/- 0.18</td>
376 <td>133738 +/- 7938</td>
377 <td>60217 +/- 1926</td>
379 <td>92.62 +/- 7.58</td>
380 <td>45.20 +/- 3.31</td>
381 <td>51.98 +/- 0.74</td>
382 <td>6692.04 +/- 92.49</td>
383 <td>0.46 +/- 0.09</td>
388 <td>152142 +/- 10002</td>
389 <td>71468 +/- 2827</td>
391 <td>91.53 +/- 8.66</td>
392 <td>47.19 +/- 3.80</td>
393 <td>82.24 +/- 0.44</td>
394 <td>10598.68 +/- 64.13</td>
395 <td>0.41 +/- 0.04</td>
400 <td>15784 +/- 2162</td>
401 <td>13443 +/- 1728</td>
403 <td>97.53 +/- 9.87</td>
404 <td>85.29 +/- 1.91</td>
405 <td>80.02 +/- 0.53</td>
406 <td>10368.82 +/- 71.08</td>
407 <td>0.15 +/- 0.05</td>
412 <td>119735 +/- 8465</td>
413 <td>64590 +/- 2529</td>
415 <td>88.69 +/- 14.79</td>
416 <td>54.10 +/- 2.59</td>
417 <td>76.95 +/- 0.32</td>
418 <td>9936.47 +/- 65.75</td>
419 <td>0.28 +/- 0.02</td>
424 <td>152177 +/- 8146</td>
425 <td>66716 +/- 2493</td>
427 <td>87.06 +/- 9.86</td>
428 <td>43.98 +/- 3.12</td>
429 <td>78.80 +/- 0.43</td>
430 <td>10162.28 +/- 49.65</td>
431 <td>0.38 +/- 0.03</td>
436 <td>84649 +/- 7325</td>
437 <td>57418 +/- 3617</td>
439 <td>89.40 +/- 8.23</td>
440 <td>67.97 +/- 1.82</td>
441 <td>33.38 +/- 0.25</td>
442 <td>4247.92 +/- 32.37</td>
443 <td>1.00 +/- 0.03</td>
448 <td>54622 +/- 4812</td>
449 <td>41136 +/- 3309</td>
451 <td>90.21 +/- 9.10</td>
452 <td>75.39 +/- 2.27</td>
453 <td>48.33 +/- 0.29</td>
454 <td>6169.21 +/- 169.50</td>
455 <td>0.86 +/- 1.22</td>
457 <tr><td colspan="13">Tile mean across chip</td></tr>
471 <h2><br></br>Expanded Lane Summary<br></br></h2>
472 <table border="1" cellpadding="5">
475 <tr><td colspan="2">Lane Info</td>
476 <td colspan="2">Phasing Info</td>
477 <td colspan="2">Raw Data (tile mean)</td>
478 <td colspan="7">Filtered Data (tile mean)</td></tr>
480 <td>Clusters (tile mean) (raw)</td>
482 <td>% Prephasing </td>
483 <td>% Error Rate (raw) </td>
484 <td> Equiv Perfect Clusters (raw) </td>
486 <td>Cycle 2-4 Av Int (PF) </td>
487 <td>Cycle 2-10 Av % Loss (PF) </td>
488 <td>Cycle 10-20 Av % Loss (PF) </td>
489 <td>% Align (PF) </td>
490 <td>% Error Rate (PF) </td>
491 <td> Equiv Perfect Clusters (PF) </td>
502 <td>0.13 +/- 0.44</td>
503 <td>-1.14 +/- 0.34</td>
517 <td>0.29 +/- 0.40</td>
518 <td>-0.79 +/- 0.35</td>
532 <td>0.68 +/- 0.51</td>
533 <td>-0.77 +/- 0.42</td>
547 <td>0.20 +/- 0.69</td>
548 <td>-1.28 +/- 0.66</td>
562 <td>0.34 +/- 0.49</td>
563 <td>-1.55 +/- 4.69</td>
577 <td>0.57 +/- 0.50</td>
578 <td>-0.91 +/- 0.39</td>
592 <td>1.15 +/- 0.52</td>
593 <td>-0.84 +/- 0.58</td>
607 <td>1.10 +/- 0.59</td>
608 <td>-1.01 +/- 0.47</td>
614 <b><br></br>IVC Plots</b>
615 <p> <a href='IVC.htm' target="_blank"> IVC.htm
617 <b><br></br>All Intensity Plots</b>
618 <p> <a href='All.htm' target="_blank"> All.htm
620 <b><br></br>Error graphs: </b>
621 <p> <a href='Error.htm' target="_blank"> Error.htm
623 <td><a href="#Top">Back to top</a></td>
624 <a name="Lane1"><h2><br></br>Lane 1<br></br></h2></a>
625 <table border="1" cellpadding="5">
629 <td>Clusters (raw)</td>
630 <td>Av 1st Cycle Int (PF) </td>
631 <td>Av % intensity after 20 cycles (PF) </td>
632 <td>% PF Clusters </td>
633 <td>% Align (PF) </td>
634 <td>Av Alignment Score (PF) </td>
635 <td>% Error Rate (PF) </td>
649 <td><a href="#Top">Back to top</a></td>
650 <a name="Lane2"><h2><br></br>Lane 2<br></br></h2></a>
651 <table border="1" cellpadding="5">
655 <td>Clusters (raw)</td>
656 <td>Av 1st Cycle Int (PF) </td>
657 <td>Av % intensity after 20 cycles (PF) </td>
658 <td>% PF Clusters </td>
659 <td>% Align (PF) </td>
660 <td>Av Alignment Score (PF) </td>
661 <td>% Error Rate (PF) </td>
675 <td><a href="#Top">Back to top</a></td>
676 <a name="Lane3"><h2><br></br>Lane 3<br></br></h2></a>
677 <table border="1" cellpadding="5">
681 <td>Clusters (raw)</td>
682 <td>Av 1st Cycle Int (PF) </td>
683 <td>Av % intensity after 20 cycles (PF) </td>
684 <td>% PF Clusters </td>
685 <td>% Align (PF) </td>
686 <td>Av Alignment Score (PF) </td>
687 <td>% Error Rate (PF) </td>
701 <td><a href="#Top">Back to top</a></td>
702 <a name="Lane4"><h2><br></br>Lane 4<br></br></h2></a>
703 <table border="1" cellpadding="5">
707 <td>Clusters (raw)</td>
708 <td>Av 1st Cycle Int (PF) </td>
709 <td>Av % intensity after 20 cycles (PF) </td>
710 <td>% PF Clusters </td>
711 <td>% Align (PF) </td>
712 <td>Av Alignment Score (PF) </td>
713 <td>% Error Rate (PF) </td>
727 <td><a href="#Top">Back to top</a></td>
728 <a name="Lane5"><h2><br></br>Lane 5<br></br></h2></a>
729 <table border="1" cellpadding="5">
733 <td>Clusters (raw)</td>
734 <td>Av 1st Cycle Int (PF) </td>
735 <td>Av % intensity after 20 cycles (PF) </td>
736 <td>% PF Clusters </td>
737 <td>% Align (PF) </td>
738 <td>Av Alignment Score (PF) </td>
739 <td>% Error Rate (PF) </td>
742 <td><a href="#Top">Back to top</a></td>
743 <a name="Lane6"><h2><br></br>Lane 6<br></br></h2></a>
744 <table border="1" cellpadding="5">
748 <td>Clusters (raw)</td>
749 <td>Av 1st Cycle Int (PF) </td>
750 <td>Av % intensity after 20 cycles (PF) </td>
751 <td>% PF Clusters </td>
752 <td>% Align (PF) </td>
753 <td>Av Alignment Score (PF) </td>
754 <td>% Error Rate (PF) </td>
768 <td><a href="#Top">Back to top</a></td>
769 <a name="Lane7"><h2><br></br>Lane 7<br></br></h2></a>
770 <table border="1" cellpadding="5">
774 <td>Clusters (raw)</td>
775 <td>Av 1st Cycle Int (PF) </td>
776 <td>Av % intensity after 20 cycles (PF) </td>
777 <td>% PF Clusters </td>
778 <td>% Align (PF) </td>
779 <td>Av Alignment Score (PF) </td>
780 <td>% Error Rate (PF) </td>
794 <td><a href="#Top">Back to top</a></td>
795 <a name="Lane8"><h2><br></br>Lane 8<br></br></h2></a>
796 <table border="1" cellpadding="5">
800 <td>Clusters (raw)</td>
801 <td>Av 1st Cycle Int (PF) </td>
802 <td>Av % intensity after 20 cycles (PF) </td>
803 <td>% PF Clusters </td>
804 <td>% Align (PF) </td>
805 <td>Av Alignment Score (PF) </td>
806 <td>% Error Rate (PF) </td>
820 <td><a href="#Top">Back to top</a></td>
824 pathname = os.path.join(gerald_dir, 'Summary.htm')
825 f = open(pathname, 'w')
829 def make_summary_htm_110(gerald_dir):
830 summary_htm = """<!--RUN_TIME Tue Oct 28 09:45:50 2008 -->
831 <!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.10 2008/07/23 15:18:30 mzerara Exp $-->
835 <a name="Top"><h2><title>081017_HWI-EAS229_0062_30J55AAXX Summary</title></h2></a>
836 <h1>Summary Information For Experiment 081017_HWI-EAS229_0062_30J55AAXX on Machine HWI-EAS229</h1>
837 <h2><br></br>Chip Summary<br></br></h2>
838 <table border="1" cellpadding="5">
839 <tr><td>Machine</td><td>HWI-EAS229</td></tr>
840 <tr><td>Run Folder</td><td>081017_HWI-EAS229_0062_30J55AAXX</td></tr>
841 <tr><td>Chip ID</td><td>unknown</td></tr>
843 <h2><br></br>Chip Results Summary<br></br></h2>
844 <table border="1" cellpadding="5">
847 <td>Clusters (PF)</td>
848 <td>Yield (kbases)</td>
850 <tr><td>162491175</td>
855 <h2><br></br>Lane Parameter Summary<br></br></h2>
856 <table border="1" cellpadding="5">
860 <td>Sample Target</td>
864 <td>Chast. Thresh.</td>
874 <td>'((FAILED_CHASTITY<=1))'</td>
877 <td><a href="#Lane1">Lane 1</a></td>
885 <td>'((FAILED_CHASTITY<=1))'</td>
888 <td><a href="#Lane2">Lane 2</a></td>
896 <td>'((FAILED_CHASTITY<=1))'</td>
899 <td><a href="#Lane3">Lane 3</a></td>
907 <td>'((FAILED_CHASTITY<=1))'</td>
910 <td><a href="#Lane4">Lane 4</a></td>
918 <td>'((FAILED_CHASTITY<=1))'</td>
921 <td><a href="#Lane5">Lane 5</a></td>
929 <td>'((FAILED_CHASTITY<=1))'</td>
932 <td><a href="#Lane6">Lane 6</a></td>
940 <td>'((FAILED_CHASTITY<=1))'</td>
943 <td><a href="#Lane7">Lane 7</a></td>
951 <td>'((FAILED_CHASTITY<=1))'</td>
954 <td><a href="#Lane8">Lane 8</a></td>
957 <h2><br></br>Lane Results Summary<br></br></h2>
958 <table border="1" cellpadding="5">
960 <td colspan="2">Lane Info</td>
961 <td colspan="8">Tile Mean +/- SD for Lane</td>
965 <td>Lane Yield (kbases) </td>
966 <td>Clusters (raw)</td>
967 <td>Clusters (PF) </td>
968 <td>1st Cycle Int (PF) </td>
969 <td>% intensity after 20 cycles (PF) </td>
970 <td>% PF Clusters </td>
971 <td>% Align (PF) </td>
972 <td>Alignment Score (PF) </td>
973 <td> % Error Rate (PF) </td>
978 <td>190220 +/- 15118</td>
979 <td>117659 +/- 8144</td>
981 <td>80.02 +/- 2.52</td>
982 <td>62.15 +/- 5.54</td>
983 <td>77.18 +/- 0.22</td>
984 <td>13447.28 +/- 43.35</td>
985 <td>2.78 +/- 0.13</td>
990 <td>190560 +/- 14399</td>
991 <td>124963 +/- 5687</td>
993 <td>75.73 +/- 2.46</td>
994 <td>65.83 +/- 4.12</td>
995 <td>70.06 +/- 0.39</td>
996 <td>12082.95 +/- 64.81</td>
997 <td>3.22 +/- 0.09</td>
1002 <td>187597 +/- 12369</td>
1003 <td>126737 +/- 5549</td>
1005 <td>72.61 +/- 2.67</td>
1006 <td>67.69 +/- 2.72</td>
1007 <td>74.03 +/- 0.22</td>
1008 <td>12470.18 +/- 50.02</td>
1009 <td>4.27 +/- 0.08</td>
1014 <td>204142 +/- 16877</td>
1015 <td>132876 +/- 4023</td>
1017 <td>80.43 +/- 3.10</td>
1018 <td>65.39 +/- 3.84</td>
1019 <td>72.95 +/- 0.15</td>
1020 <td>13273.80 +/- 39.75</td>
1021 <td>0.78 +/- 0.10</td>
1026 <td>247308 +/- 11600</td>
1027 <td>117036 +/- 4489</td>
1029 <td>68.60 +/- 2.40</td>
1030 <td>47.48 +/- 3.63</td>
1031 <td>66.91 +/- 0.54</td>
1032 <td>11700.08 +/- 66.33</td>
1033 <td>2.62 +/- 0.13</td>
1038 <td>204298 +/- 15640</td>
1039 <td>130543 +/- 6972</td>
1041 <td>81.35 +/- 1.96</td>
1042 <td>64.14 +/- 4.40</td>
1043 <td>77.28 +/- 0.11</td>
1044 <td>14084.01 +/- 23.09</td>
1045 <td>0.71 +/- 0.03</td>
1050 <td>202707 +/- 15404</td>
1051 <td>128196 +/- 9745</td>
1053 <td>79.95 +/- 2.08</td>
1054 <td>63.48 +/- 5.63</td>
1055 <td>75.78 +/- 0.18</td>
1056 <td>13758.74 +/- 60.86</td>
1057 <td>0.88 +/- 0.12</td>
1062 <td>198075 +/- 14702</td>
1063 <td>118208 +/- 14798</td>
1065 <td>81.80 +/- 2.53</td>
1066 <td>59.85 +/- 7.67</td>
1067 <td>74.55 +/- 0.36</td>
1068 <td>13586.07 +/- 103.97</td>
1069 <td>0.71 +/- 0.15</td>
1071 <tr><td colspan="13">Tile mean across chip</td></tr>
1085 <h2><br></br>Expanded Lane Summary<br></br></h2>
1086 <table border="1" cellpadding="5">
1089 <tr><td colspan="2">Lane Info</td>
1090 <td colspan="2">Phasing Info</td>
1091 <td colspan="2">Raw Data (tile mean)</td>
1092 <td colspan="7">Filtered Data (tile mean)</td></tr>
1094 <td>Clusters (tile mean) (raw)</td>
1096 <td>% Prephasing </td>
1097 <td>% Error Rate (raw) </td>
1098 <td> Equiv Perfect Clusters (raw) </td>
1099 <td>% retained </td>
1100 <td>Cycle 2-4 Av Int (PF) </td>
1101 <td>Cycle 2-10 Av % Loss (PF) </td>
1102 <td>Cycle 10-20 Av % Loss (PF) </td>
1103 <td>% Align (PF) </td>
1104 <td>% Error Rate (PF) </td>
1105 <td> Equiv Perfect Clusters (PF) </td>
1116 <td>0.56 +/- 0.22</td>
1117 <td>0.29 +/- 0.14</td>
1131 <td>0.78 +/- 0.15</td>
1132 <td>0.53 +/- 0.15</td>
1146 <td>0.56 +/- 0.17</td>
1147 <td>0.59 +/- 0.26</td>
1161 <td>1.28 +/- 0.21</td>
1162 <td>0.77 +/- 0.21</td>
1176 <td>1.61 +/- 0.39</td>
1177 <td>1.21 +/- 0.21</td>
1191 <td>1.30 +/- 0.11</td>
1192 <td>0.73 +/- 0.22</td>
1206 <td>1.27 +/- 0.38</td>
1207 <td>0.66 +/- 0.22</td>
1221 <td>1.19 +/- 0.16</td>
1222 <td>0.65 +/- 0.29</td>
1230 pathname = os.path.join(gerald_dir, 'Summary.htm')
1231 f = open(pathname, 'w')
1232 f.write(summary_htm)
1235 def make_summary_paired_htm(gerald_dir):
1236 summary_htm = """<!--RUN_TIME Thu Nov 13 15:11:29 2008 -->
1237 <!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.94 2007/12/04 09:59:07 rshaw Exp $-->
1241 <a name="Top"><h2><title>080920_HWI-EAS229_0057_30GBJAAXX Summary</title></h2></a>
1242 <h1>Summary Information For Experiment 080920_HWI-EAS229_0057_30GBJAAXX on Machine unknown</h1>
1243 <h2><br></br>Chip Summary<br></br></h2>
1244 <table border="1" cellpadding="5">
1245 <tr><td>Machine</td><td>UNKNOWN</td></tr>
1246 <tr><td>Run Folder</td><td>080920_HWI-EAS229_0057_30GBJAAXX</td></tr>
1247 <tr><td>Chip ID</td><td>unknown</td></tr>
1249 <h2><br></br>Chip Results Summary<br></br></h2>
1250 <table border="1" cellpadding="5">
1253 <td>Clusters (PF)</td>
1254 <td>Yield (kbases)</td>
1256 <tr><td>126151880</td>
1261 <h2><br></br>Lane Parameter Summary<br></br></h2>
1262 <table border="1" cellpadding="5">
1266 <td>Sample Target</td>
1267 <td>Sample Type</td>
1279 <td>'((CHASTITY>=0.6))'</td>
1281 <td><a href="#Lane1">Lane 1</a></td>
1289 <td>'((CHASTITY>=0.6))'</td>
1291 <td><a href="#Lane2">Lane 2</a></td>
1299 <td>'((CHASTITY>=0.6))'</td>
1301 <td><a href="#Lane3">Lane 3</a></td>
1309 <td>'((CHASTITY>=0.6))'</td>
1311 <td><a href="#Lane4">Lane 4</a></td>
1319 <td>'((CHASTITY>=0.6))'</td>
1321 <td><a href="#Lane5">Lane 5</a></td>
1329 <td>'((CHASTITY>=0.6))'</td>
1331 <td><a href="#Lane6">Lane 6</a></td>
1339 <td>'((CHASTITY>=0.6))'</td>
1341 <td><a href="#Lane7">Lane 7</a></td>
1349 <td>'((CHASTITY>=0.6))'</td>
1351 <td><a href="#Lane8">Lane 8</a></td>
1354 <h2><br></br>Lane Results Summary : Read 1<br></br></h2>
1355 <table border="1" cellpadding="5">
1357 <td colspan="2">Lane Info</td>
1358 <td colspan="8">Tile Mean +/- SD for Lane</td>
1362 <td>Lane Yield (kbases) </td>
1363 <td>Clusters (raw)</td>
1364 <td>Clusters (PF) </td>
1365 <td>1st Cycle Int (PF) </td>
1366 <td>% intensity after 20 cycles (PF) </td>
1367 <td>% PF Clusters </td>
1368 <td>% Align (PF) </td>
1369 <td>Alignment Score (PF) </td>
1370 <td> % Error Rate (PF) </td>
1375 <td>103646 +/- 4515</td>
1376 <td>74887 +/- 6080</td>
1378 <td>99.34 +/- 3.52</td>
1379 <td>72.22 +/- 4.63</td>
1380 <td>89.19 +/- 0.59</td>
1381 <td>14.16 +/- 0.63</td>
1382 <td>0.94 +/- 0.17</td>
1387 <td>106678 +/- 4652</td>
1388 <td>78260 +/- 2539</td>
1390 <td>98.23 +/- 2.66</td>
1391 <td>73.43 +/- 2.52</td>
1392 <td>87.05 +/- 0.64</td>
1393 <td>16.81 +/- 0.55</td>
1394 <td>0.92 +/- 0.17</td>
1399 <td>84583 +/- 5963</td>
1400 <td>70065 +/- 4194</td>
1402 <td>99.82 +/- 3.05</td>
1403 <td>82.90 +/- 1.32</td>
1404 <td>89.49 +/- 0.20</td>
1405 <td>18.13 +/- 0.66</td>
1406 <td>0.81 +/- 0.13</td>
1411 <td>68813 +/- 4782</td>
1412 <td>56905 +/- 4145</td>
1414 <td>102.00 +/- 14.74</td>
1415 <td>82.91 +/- 5.89</td>
1416 <td>56.93 +/- 0.82</td>
1417 <td>25.85 +/- 2.30</td>
1418 <td>0.95 +/- 0.30</td>
1423 <td>104854 +/- 4664</td>
1424 <td>79879 +/- 6270</td>
1426 <td>98.26 +/- 5.85</td>
1427 <td>76.34 +/- 6.67</td>
1428 <td>57.71 +/- 0.30</td>
1429 <td>26.16 +/- 1.68</td>
1430 <td>0.97 +/- 0.19</td>
1435 <td>43555 +/- 1632</td>
1436 <td>37946 +/- 2140</td>
1438 <td>105.74 +/- 8.40</td>
1439 <td>87.14 +/- 3.87</td>
1440 <td>89.08 +/- 1.00</td>
1441 <td>33.53 +/- 2.18</td>
1442 <td>1.05 +/- 0.21</td>
1447 <td>54265 +/- 1588</td>
1448 <td>41680 +/- 5319</td>
1450 <td>111.33 +/- 8.90</td>
1451 <td>76.94 +/- 10.52</td>
1452 <td>84.50 +/- 1.41</td>
1453 <td>27.44 +/- 2.33</td>
1454 <td>1.32 +/- 0.25</td>
1459 <td>64363 +/- 2697</td>
1460 <td>39991 +/- 6785</td>
1462 <td>109.93 +/- 7.80</td>
1463 <td>62.45 +/- 12.05</td>
1464 <td>82.20 +/- 2.08</td>
1465 <td>24.63 +/- 2.53</td>
1466 <td>1.57 +/- 0.22</td>
1468 <tr><td colspan="13">Tile mean across chip</td></tr>
1482 <h2><br></br>Lane Results Summary : Read 2<br></br></h2>
1483 <table border="1" cellpadding="5">
1485 <td colspan="2">Lane Info</td>
1486 <td colspan="8">Tile Mean +/- SD for Lane</td>
1490 <td>Lane Yield (kbases) </td>
1491 <td>Clusters (raw)</td>
1492 <td>Clusters (PF) </td>
1493 <td>1st Cycle Int (PF) </td>
1494 <td>% intensity after 20 cycles (PF) </td>
1495 <td>% PF Clusters </td>
1496 <td>% Align (PF) </td>
1497 <td>Alignment Score (PF) </td>
1498 <td> % Error Rate (PF) </td>
1503 <td>103647 +/- 4516</td>
1504 <td>74887 +/- 6080</td>
1506 <td>94.42 +/- 5.68</td>
1507 <td>72.22 +/- 4.63</td>
1508 <td>81.54 +/- 2.13</td>
1509 <td>42.70 +/- 5.49</td>
1510 <td>0.89 +/- 0.27</td>
1515 <td>106679 +/- 4653</td>
1516 <td>78260 +/- 2539</td>
1518 <td>93.57 +/- 2.55</td>
1519 <td>73.43 +/- 2.52</td>
1520 <td>82.05 +/- 0.37</td>
1521 <td>43.98 +/- 3.02</td>
1522 <td>0.76 +/- 0.15</td>
1527 <td>84584 +/- 5964</td>
1528 <td>70065 +/- 4194</td>
1530 <td>94.23 +/- 2.19</td>
1531 <td>82.90 +/- 1.32</td>
1532 <td>84.94 +/- 0.28</td>
1533 <td>51.76 +/- 2.29</td>
1534 <td>0.59 +/- 0.07</td>
1539 <td>68814 +/- 4783</td>
1540 <td>56905 +/- 4145</td>
1542 <td>96.82 +/- 7.12</td>
1543 <td>82.91 +/- 5.89</td>
1544 <td>56.01 +/- 0.99</td>
1545 <td>27.86 +/- 3.48</td>
1546 <td>0.95 +/- 0.33</td>
1551 <td>104855 +/- 4665</td>
1552 <td>79879 +/- 6270</td>
1554 <td>103.56 +/- 15.45</td>
1555 <td>76.34 +/- 6.67</td>
1556 <td>56.76 +/- 0.41</td>
1557 <td>25.68 +/- 2.06</td>
1558 <td>0.98 +/- 0.17</td>
1563 <td>43556 +/- 1633</td>
1564 <td>37946 +/- 2140</td>
1566 <td>100.82 +/- 5.47</td>
1567 <td>87.14 +/- 3.87</td>
1568 <td>88.64 +/- 1.42</td>
1569 <td>34.05 +/- 2.60</td>
1570 <td>0.98 +/- 0.22</td>
1575 <td>54266 +/- 1589</td>
1576 <td>41680 +/- 5319</td>
1578 <td>103.42 +/- 3.47</td>
1579 <td>76.94 +/- 10.52</td>
1580 <td>83.90 +/- 1.32</td>
1581 <td>27.60 +/- 2.07</td>
1582 <td>1.26 +/- 0.16</td>
1587 <td>64364 +/- 2698</td>
1588 <td>39991 +/- 6785</td>
1590 <td>99.48 +/- 3.23</td>
1591 <td>62.45 +/- 12.05</td>
1592 <td>79.81 +/- 3.35</td>
1593 <td>23.06 +/- 2.50</td>
1594 <td>1.56 +/- 0.23</td>
1596 <tr><td colspan="13">Tile mean across chip</td></tr>
1610 <h2><br></br>Expanded Lane Summary : Read 1<br></br></h2>
1611 <table border="1" cellpadding="5">
1614 <tr><td colspan="2">Lane Info</td>
1615 <td colspan="2">Phasing Info</td>
1616 <td colspan="2">Raw Data (tile mean)</td>
1617 <td colspan="7">Filtered Data (tile mean)</td></tr>
1619 <td>Clusters (tile mean) (raw)</td>
1621 <td>% Prephasing </td>
1622 <td>% Error Rate (raw) </td>
1623 <td> Equiv Perfect Clusters (raw) </td>
1624 <td>% retained </td>
1625 <td>Cycle 2-4 Av Int (PF) </td>
1626 <td>Cycle 2-10 Av % Loss (PF) </td>
1627 <td>Cycle 10-20 Av % Loss (PF) </td>
1628 <td>% Align (PF) </td>
1629 <td>% Error Rate (PF) </td>
1630 <td> Equiv Perfect Clusters (PF) </td>
1641 <td>-0.53 +/- 0.37</td>
1642 <td>-0.42 +/- 0.21</td>
1656 <td>0.08 +/- 0.43</td>
1657 <td>-0.17 +/- 0.34</td>
1671 <td>0.29 +/- 0.48</td>
1672 <td>-0.02 +/- 0.17</td>
1686 <td>-0.01 +/- 0.62</td>
1687 <td>-0.37 +/- 0.30</td>
1701 <td>-0.03 +/- 0.46</td>
1702 <td>-0.49 +/- 0.27</td>
1716 <td>-0.19 +/- 0.46</td>
1717 <td>-0.34 +/- 0.40</td>
1731 <td>-0.41 +/- 0.49</td>
1732 <td>-0.55 +/- 0.23</td>
1746 <td>-0.52 +/- 0.36</td>
1747 <td>-0.29 +/- 0.19</td>
1753 <h2><br></br>Expanded Lane Summary : Read 2<br></br></h2>
1754 <table border="1" cellpadding="5">
1757 <tr><td colspan="2">Lane Info</td>
1758 <td colspan="2">Phasing Info</td>
1759 <td colspan="2">Raw Data (tile mean)</td>
1760 <td colspan="7">Filtered Data (tile mean)</td></tr>
1762 <td>Clusters (tile mean) (raw)</td>
1764 <td>% Prephasing </td>
1765 <td>% Error Rate (raw) </td>
1766 <td> Equiv Perfect Clusters (raw) </td>
1767 <td>% retained </td>
1768 <td>Cycle 2-4 Av Int (PF) </td>
1769 <td>Cycle 2-10 Av % Loss (PF) </td>
1770 <td>Cycle 10-20 Av % Loss (PF) </td>
1771 <td>% Align (PF) </td>
1772 <td>% Error Rate (PF) </td>
1773 <td> Equiv Perfect Clusters (PF) </td>
1784 <td>-0.53 +/- 0.37</td>
1785 <td>-0.42 +/- 0.21</td>
1799 <td>0.08 +/- 0.43</td>
1800 <td>-0.17 +/- 0.34</td>
1814 <td>0.29 +/- 0.48</td>
1815 <td>-0.02 +/- 0.17</td>
1829 <td>-0.01 +/- 0.62</td>
1830 <td>-0.37 +/- 0.30</td>
1844 <td>-0.03 +/- 0.46</td>
1845 <td>-0.49 +/- 0.27</td>
1859 <td>-0.19 +/- 0.46</td>
1860 <td>-0.34 +/- 0.40</td>
1874 <td>-0.41 +/- 0.49</td>
1875 <td>-0.55 +/- 0.23</td>
1889 <td>-0.52 +/- 0.36</td>
1890 <td>-0.29 +/- 0.19</td>
1898 pathname = os.path.join(gerald_dir, 'Summary.htm')
1899 f = open(pathname, 'w')
1900 f.write(summary_htm)
1903 def make_summary_ipar130_htm(gerald_dir):
1904 test_dir = os.path.split(__file__)[0]
1905 testdata_dir = os.path.join(test_dir, 'testdata')
1906 summary_htm = os.path.join(testdata_dir, 'Summary-ipar130.htm')
1907 dest = os.path.join(gerald_dir, 'Summary.htm')
1908 shutil.copy(summary_htm, dest)
1910 def make_eland_results(gerald_dir):
1911 eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D.
1912 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 chr2L.fa 8796855 R DD 24T
1913 >HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0
1914 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T
1916 for i in range(1,9):
1917 pathname = os.path.join(gerald_dir,
1918 's_%d_eland_result.txt' % (i,))
1919 f = open(pathname, 'w')
1920 f.write(eland_result)
1923 def make_eland_multi(gerald_dir, paired=False):
1924 eland_multi = [""">HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM
1925 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:2 chr5.fa:55269838R0
1926 >HWI-EAS229_60_30DP9AAXX:1:1:1121:379 AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG 2:1:0 chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0
1927 >HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1,chr7.fa:22516603F1,chr9.fa:134886204R
1928 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample1:55269838R0
1929 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample2:55269838R0
1930 """, """>HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM
1931 >HWI-EAS229_60_30DP9AAXX:1:1:1221:788 NNNNNNNNNNNNNNGTGGTATGGCGGTGTCTGGTCGT QC
1932 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:2 chr5.fa:55269838R0
1933 >HWI-EAS229_60_30DP9AAXX:1:1:1121:379 AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG 2:1:0 chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0,chr7.fa:22516603F1,chr9.fa:134886204R
1934 >HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1
1935 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample1:55269838R0
1936 >HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample2:55269838R0
1940 for i in range(1,9):
1941 pathname = os.path.join(gerald_dir,
1942 's_%d_%d_eland_multi.txt' % (i,e))
1943 f = open(pathname, 'w')
1944 f.write(eland_multi[e-1])
1947 for i in range(1,9):
1948 pathname = os.path.join(gerald_dir,
1949 's_%d_eland_multi.txt' % (i,))
1950 f = open(pathname, 'w')
1951 f.write(eland_multi[0])