2 Create simulated solexa/illumina runfolders for testing
8 def make_ipar_dir(data_dir):
10 Construct an artificial ipar parameter file and directory
12 params = """<?xml version="1.0"?>
14 <Run Name="IPAR_1.01">
15 <Software Name="IPAR" Version="2.01.192.0" />
16 <Cycles First="1" Last="37" Number="37" />
18 <ImagingReads Index="1">
19 <FirstCycle>1</FirstCycle>
20 <LastCycle>37</LastCycle>
21 <RunFolder>081021_HWI-EAS229_0063_30HKUAAXX</RunFolder>
24 <FirstCycle>1</FirstCycle>
25 <LastCycle>37</LastCycle>
26 <RunFolder>081021_HWI-EAS229_0063_30HKUAAXX</RunFolder>
28 <Compression>gzip</Compression>
29 <CompressionSuffix>.p.gz</CompressionSuffix>
30 <Instrument>HWI-EAS229</Instrument>
31 <RunFolder>081021_HWI-EAS229_0063_30HKUAAXX</RunFolder>
34 <AutoOffsetFlag>1</AutoOffsetFlag>
36 <RemappingDistance>1.5</RemappingDistance>
37 <Threshold>4</Threshold>
42 <TileRange Max="100" Min="1" />
46 <TileRange Max="100" Min="1" />
50 <TileRange Max="100" Min="1" />
54 <TileRange Max="100" Min="1" />
58 <TileRange Max="100" Min="1" />
62 <TileRange Max="100" Min="1" />
66 <TileRange Max="100" Min="1" />
70 <TileRange Max="100" Min="1" />
76 f = open(os.path.join(data_dir, '.params'),'w')
79 ipar_dir = os.path.join(data_dir, 'IPAR_1.01')
80 if not os.path.exists(ipar_dir):
84 def make_flowcell_id(runfolder_dir, flowcell_id=None):
85 if flowcell_id is None:
86 flowcell_id = '207BTAAXY'
88 config = """<?xml version="1.0"?>
91 </FlowcellId>""" % (flowcell_id,)
92 config_dir = os.path.join(runfolder_dir, 'Config')
94 if not os.path.exists(config_dir):
96 pathname = os.path.join(config_dir, 'FlowcellId.xml')
97 f = open(pathname,'w')
101 def make_matrix(matrix_dir):
102 contents = """# Auto-generated frequency response matrix
107 0.77 0.15 -0.04 -0.04
108 0.76 1.02 -0.05 -0.06
109 -0.10 -0.10 1.17 -0.03
110 -0.13 -0.12 0.80 1.27
112 s_matrix = os.path.join(matrix_dir, 's_matrix.txt')
113 f = open(s_matrix, 'w')
117 def make_phasing_params(bustard_dir):
118 for lane in range(1,9):
119 pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane))
120 f = open(pathname, 'w')
121 f.write("""<Parameters>
122 <Phasing>0.009900</Phasing>
123 <Prephasing>0.003500</Prephasing>
128 def make_gerald_config(gerald_dir):
129 config_xml = """<RunParameters>
130 <ChipWideRunParameters>
131 <ANALYSIS>default</ANALYSIS>
132 <BAD_LANES></BAD_LANES>
133 <BAD_TILES></BAD_TILES>
134 <CONTAM_DIR></CONTAM_DIR>
135 <CONTAM_FILE></CONTAM_FILE>
136 <ELAND_GENOME>Need_to_specify_ELAND_genome_directory</ELAND_GENOME>
137 <ELAND_MULTIPLE_INSTANCES>8</ELAND_MULTIPLE_INSTANCES>
138 <ELAND_REPEAT></ELAND_REPEAT>
139 <EMAIL_DOMAIN>domain.com</EMAIL_DOMAIN>
140 <EMAIL_LIST>diane</EMAIL_LIST>
141 <EMAIL_SERVER>localhost:25</EMAIL_SERVER>
142 <EXPT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane</EXPT_DIR>
143 <EXPT_DIR_ROOT>/home/diane/gec</EXPT_DIR_ROOT>
145 <GENOME_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald/../../Genomes</GENOME_DIR>
146 <GENOME_FILE>Need_to_specify_genome_file_name</GENOME_FILE>
147 <HAMSTER_FLAG>genome</HAMSTER_FLAG>
148 <OUT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane/GERALD_19-04-2008_diane</OUT_DIR>
149 <POST_RUN_COMMAND></POST_RUN_COMMAND>
150 <PRB_FILE_SUFFIX>_prb.txt</PRB_FILE_SUFFIX>
151 <PURE_BASES>12</PURE_BASES>
152 <QF_PARAMS>'((CHASTITY>=0.6))'</QF_PARAMS>
153 <QHG_FILE_SUFFIX>_qhg.txt</QHG_FILE_SUFFIX>
154 <QUALITY_FORMAT>--symbolic</QUALITY_FORMAT>
155 <READ_LENGTH>32</READ_LENGTH>
156 <SEQUENCE_FORMAT>--scarf</SEQUENCE_FORMAT>
157 <SEQ_FILE_SUFFIX>_seq.txt</SEQ_FILE_SUFFIX>
158 <SIG_FILE_SUFFIX_DEPHASED>_sig2.txt</SIG_FILE_SUFFIX_DEPHASED>
159 <SIG_FILE_SUFFIX_NOT_DEPHASED>_sig.txt</SIG_FILE_SUFFIX_NOT_DEPHASED>
160 <SOFTWARE_VERSION>@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp</SOFTWARE_VERSION>
161 <TILE_REGEX>s_[1-8]_[0-9][0-9][0-9][0-9]</TILE_REGEX>
162 <TILE_ROOT>s</TILE_ROOT>
163 <TIME_STAMP>Sat Apr 19 19:08:30 2008</TIME_STAMP>
164 <TOOLS_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald</TOOLS_DIR>
165 <USE_BASES>all</USE_BASES>
166 <WEB_DIR_ROOT>http://host.domain.com/yourshare/</WEB_DIR_ROOT>
167 </ChipWideRunParameters>
168 <LaneSpecificRunParameters>
181 <s_2>/g/equcab1</s_2>
182 <s_3>/g/equcab1</s_3>
183 <s_4>/g/canfam2</s_4>
200 <s_1>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_1>
201 <s_2>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_2>
202 <s_3>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_3>
203 <s_4>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_4>
204 <s_5>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_5>
205 <s_6>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_6>
206 <s_7>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_7>
207 <s_8>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_8>
209 </LaneSpecificRunParameters>
212 pathname = os.path.join(gerald_dir, 'config.xml')
213 f = open(pathname,'w')
217 def make_summary100_htm(gerald_dir):
218 summary_htm="""<!--RUN_TIME Wed Jul 2 06:47:44 2008 -->
219 <!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.94 2007/12/04 09:59:07 rshaw Exp $-->
223 <a name="Top"><h2><title>080627_HWI-EAS229_0036_3055HAXX Summary</title></h2></a>
224 <h1>Summary Information For Experiment 080627_HWI-EAS229_0036_3055HAXX on Machine HWI-EAS229</h1>
225 <h2><br></br>Chip Summary<br></br></h2>
226 <table border="1" cellpadding="5">
227 <tr><td>Machine</td><td>HWI-EAS229</td></tr>
228 <tr><td>Run Folder</td><td>080627_HWI-EAS229_0036_3055HAXX</td></tr>
229 <tr><td>Chip ID</td><td>unknown</td></tr>
231 <h2><br></br>Chip Results Summary<br></br></h2>
232 <table border="1" cellpadding="5">
235 <td>Clusters (PF)</td>
236 <td>Yield (kbases)</td>
238 <tr><td>80933224</td>
243 <h2><br></br>Lane Parameter Summary<br></br></h2>
244 <table border="1" cellpadding="5">
248 <td>Sample Target</td>
261 <td>'((CHASTITY>=0.6))'</td>
263 <td><a href="#Lane1">Lane 1</a></td>
271 <td>'((CHASTITY>=0.6))'</td>
273 <td><a href="#Lane2">Lane 2</a></td>
281 <td>'((CHASTITY>=0.6))'</td>
283 <td><a href="#Lane3">Lane 3</a></td>
291 <td>'((CHASTITY>=0.6))'</td>
293 <td><a href="#Lane4">Lane 4</a></td>
301 <td>'((CHASTITY>=0.6))'</td>
303 <td><a href="#Lane5">Lane 5</a></td>
311 <td>'((CHASTITY>=0.6))'</td>
313 <td><a href="#Lane6">Lane 6</a></td>
321 <td>'((CHASTITY>=0.6))'</td>
323 <td><a href="#Lane7">Lane 7</a></td>
331 <td>'((CHASTITY>=0.6))'</td>
333 <td><a href="#Lane8">Lane 8</a></td>
336 <h2><br></br>Lane Results Summary<br></br></h2>
337 <table border="1" cellpadding="5">
339 <td colspan="2">Lane Info</td>
340 <td colspan="8">Tile Mean +/- SD for Lane</td>
344 <td>Lane Yield (kbases) </td>
345 <td>Clusters (raw)</td>
346 <td>Clusters (PF) </td>
347 <td>1st Cycle Int (PF) </td>
348 <td>% intensity after 20 cycles (PF) </td>
349 <td>% PF Clusters </td>
350 <td>% Align (PF) </td>
351 <td>Alignment Score (PF) </td>
352 <td> % Error Rate (PF) </td>
357 <td>96483 +/- 9074</td>
358 <td>60787 +/- 4240</td>
360 <td>101.88 +/- 6.03</td>
361 <td>63.21 +/- 3.29</td>
362 <td>70.33 +/- 0.24</td>
363 <td>9054.08 +/- 59.16</td>
364 <td>0.46 +/- 0.18</td>
369 <td>133738 +/- 7938</td>
370 <td>60217 +/- 1926</td>
372 <td>92.62 +/- 7.58</td>
373 <td>45.20 +/- 3.31</td>
374 <td>51.98 +/- 0.74</td>
375 <td>6692.04 +/- 92.49</td>
376 <td>0.46 +/- 0.09</td>
381 <td>152142 +/- 10002</td>
382 <td>71468 +/- 2827</td>
384 <td>91.53 +/- 8.66</td>
385 <td>47.19 +/- 3.80</td>
386 <td>82.24 +/- 0.44</td>
387 <td>10598.68 +/- 64.13</td>
388 <td>0.41 +/- 0.04</td>
393 <td>15784 +/- 2162</td>
394 <td>13443 +/- 1728</td>
396 <td>97.53 +/- 9.87</td>
397 <td>85.29 +/- 1.91</td>
398 <td>80.02 +/- 0.53</td>
399 <td>10368.82 +/- 71.08</td>
400 <td>0.15 +/- 0.05</td>
405 <td>119735 +/- 8465</td>
406 <td>64590 +/- 2529</td>
408 <td>88.69 +/- 14.79</td>
409 <td>54.10 +/- 2.59</td>
410 <td>76.95 +/- 0.32</td>
411 <td>9936.47 +/- 65.75</td>
412 <td>0.28 +/- 0.02</td>
417 <td>152177 +/- 8146</td>
418 <td>66716 +/- 2493</td>
420 <td>87.06 +/- 9.86</td>
421 <td>43.98 +/- 3.12</td>
422 <td>78.80 +/- 0.43</td>
423 <td>10162.28 +/- 49.65</td>
424 <td>0.38 +/- 0.03</td>
429 <td>84649 +/- 7325</td>
430 <td>57418 +/- 3617</td>
432 <td>89.40 +/- 8.23</td>
433 <td>67.97 +/- 1.82</td>
434 <td>33.38 +/- 0.25</td>
435 <td>4247.92 +/- 32.37</td>
436 <td>1.00 +/- 0.03</td>
441 <td>54622 +/- 4812</td>
442 <td>41136 +/- 3309</td>
444 <td>90.21 +/- 9.10</td>
445 <td>75.39 +/- 2.27</td>
446 <td>48.33 +/- 0.29</td>
447 <td>6169.21 +/- 169.50</td>
448 <td>0.86 +/- 1.22</td>
450 <tr><td colspan="13">Tile mean across chip</td></tr>
464 <h2><br></br>Expanded Lane Summary<br></br></h2>
465 <table border="1" cellpadding="5">
468 <tr><td colspan="2">Lane Info</td>
469 <td colspan="2">Phasing Info</td>
470 <td colspan="2">Raw Data (tile mean)</td>
471 <td colspan="7">Filtered Data (tile mean)</td></tr>
473 <td>Clusters (tile mean) (raw)</td>
475 <td>% Prephasing </td>
476 <td>% Error Rate (raw) </td>
477 <td> Equiv Perfect Clusters (raw) </td>
479 <td>Cycle 2-4 Av Int (PF) </td>
480 <td>Cycle 2-10 Av % Loss (PF) </td>
481 <td>Cycle 10-20 Av % Loss (PF) </td>
482 <td>% Align (PF) </td>
483 <td>% Error Rate (PF) </td>
484 <td> Equiv Perfect Clusters (PF) </td>
495 <td>0.13 +/- 0.44</td>
496 <td>-1.14 +/- 0.34</td>
510 <td>0.29 +/- 0.40</td>
511 <td>-0.79 +/- 0.35</td>
525 <td>0.68 +/- 0.51</td>
526 <td>-0.77 +/- 0.42</td>
540 <td>0.20 +/- 0.69</td>
541 <td>-1.28 +/- 0.66</td>
555 <td>0.34 +/- 0.49</td>
556 <td>-1.55 +/- 4.69</td>
570 <td>0.57 +/- 0.50</td>
571 <td>-0.91 +/- 0.39</td>
585 <td>1.15 +/- 0.52</td>
586 <td>-0.84 +/- 0.58</td>
600 <td>1.10 +/- 0.59</td>
601 <td>-1.01 +/- 0.47</td>
607 <b><br></br>IVC Plots</b>
608 <p> <a href='IVC.htm' target="_blank"> IVC.htm
610 <b><br></br>All Intensity Plots</b>
611 <p> <a href='All.htm' target="_blank"> All.htm
613 <b><br></br>Error graphs: </b>
614 <p> <a href='Error.htm' target="_blank"> Error.htm
616 <td><a href="#Top">Back to top</a></td>
617 <a name="Lane1"><h2><br></br>Lane 1<br></br></h2></a>
618 <table border="1" cellpadding="5">
622 <td>Clusters (raw)</td>
623 <td>Av 1st Cycle Int (PF) </td>
624 <td>Av % intensity after 20 cycles (PF) </td>
625 <td>% PF Clusters </td>
626 <td>% Align (PF) </td>
627 <td>Av Alignment Score (PF) </td>
628 <td>% Error Rate (PF) </td>
642 <td><a href="#Top">Back to top</a></td>
643 <a name="Lane2"><h2><br></br>Lane 2<br></br></h2></a>
644 <table border="1" cellpadding="5">
648 <td>Clusters (raw)</td>
649 <td>Av 1st Cycle Int (PF) </td>
650 <td>Av % intensity after 20 cycles (PF) </td>
651 <td>% PF Clusters </td>
652 <td>% Align (PF) </td>
653 <td>Av Alignment Score (PF) </td>
654 <td>% Error Rate (PF) </td>
668 <td><a href="#Top">Back to top</a></td>
669 <a name="Lane3"><h2><br></br>Lane 3<br></br></h2></a>
670 <table border="1" cellpadding="5">
674 <td>Clusters (raw)</td>
675 <td>Av 1st Cycle Int (PF) </td>
676 <td>Av % intensity after 20 cycles (PF) </td>
677 <td>% PF Clusters </td>
678 <td>% Align (PF) </td>
679 <td>Av Alignment Score (PF) </td>
680 <td>% Error Rate (PF) </td>
694 <td><a href="#Top">Back to top</a></td>
695 <a name="Lane4"><h2><br></br>Lane 4<br></br></h2></a>
696 <table border="1" cellpadding="5">
700 <td>Clusters (raw)</td>
701 <td>Av 1st Cycle Int (PF) </td>
702 <td>Av % intensity after 20 cycles (PF) </td>
703 <td>% PF Clusters </td>
704 <td>% Align (PF) </td>
705 <td>Av Alignment Score (PF) </td>
706 <td>% Error Rate (PF) </td>
720 <td><a href="#Top">Back to top</a></td>
721 <a name="Lane5"><h2><br></br>Lane 5<br></br></h2></a>
722 <table border="1" cellpadding="5">
726 <td>Clusters (raw)</td>
727 <td>Av 1st Cycle Int (PF) </td>
728 <td>Av % intensity after 20 cycles (PF) </td>
729 <td>% PF Clusters </td>
730 <td>% Align (PF) </td>
731 <td>Av Alignment Score (PF) </td>
732 <td>% Error Rate (PF) </td>
735 <td><a href="#Top">Back to top</a></td>
736 <a name="Lane6"><h2><br></br>Lane 6<br></br></h2></a>
737 <table border="1" cellpadding="5">
741 <td>Clusters (raw)</td>
742 <td>Av 1st Cycle Int (PF) </td>
743 <td>Av % intensity after 20 cycles (PF) </td>
744 <td>% PF Clusters </td>
745 <td>% Align (PF) </td>
746 <td>Av Alignment Score (PF) </td>
747 <td>% Error Rate (PF) </td>
761 <td><a href="#Top">Back to top</a></td>
762 <a name="Lane7"><h2><br></br>Lane 7<br></br></h2></a>
763 <table border="1" cellpadding="5">
767 <td>Clusters (raw)</td>
768 <td>Av 1st Cycle Int (PF) </td>
769 <td>Av % intensity after 20 cycles (PF) </td>
770 <td>% PF Clusters </td>
771 <td>% Align (PF) </td>
772 <td>Av Alignment Score (PF) </td>
773 <td>% Error Rate (PF) </td>
787 <td><a href="#Top">Back to top</a></td>
788 <a name="Lane8"><h2><br></br>Lane 8<br></br></h2></a>
789 <table border="1" cellpadding="5">
793 <td>Clusters (raw)</td>
794 <td>Av 1st Cycle Int (PF) </td>
795 <td>Av % intensity after 20 cycles (PF) </td>
796 <td>% PF Clusters </td>
797 <td>% Align (PF) </td>
798 <td>Av Alignment Score (PF) </td>
799 <td>% Error Rate (PF) </td>
813 <td><a href="#Top">Back to top</a></td>
817 pathname = os.path.join(gerald_dir, 'Summary.htm')
818 f = open(pathname, 'w')
822 def make_eland_results(gerald_dir):
823 eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D.
824 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 chr2L.fa 8796855 R DD 24T
825 >HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0
826 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T
829 pathname = os.path.join(gerald_dir,
830 's_%d_eland_result.txt' % (i,))
831 f = open(pathname, 'w')
832 f.write(eland_result)