3 from datetime import datetime, date
9 from htsworkflow.pipelines import firecrest
10 from htsworkflow.pipelines import bustard
11 from htsworkflow.pipelines import gerald
12 from htsworkflow.pipelines import runfolder
13 from htsworkflow.pipelines.runfolder import ElementTree
15 from htsworkflow.pipelines.test.simulate_runfolder import *
18 def make_summary_htm(gerald_dir):
19 summary_htm="""<!--RUN_TIME Wed Jul 2 06:47:44 2008 -->
20 <!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.94 2007/12/04 09:59:07 rshaw Exp $-->
24 <a name="Top"><h2><title>080627_HWI-EAS229_0036_3055HAXX Summary</title></h2></a>
25 <h1>Summary Information For Experiment 080627_HWI-EAS229_0036_3055HAXX on Machine HWI-EAS229</h1>
26 <h2><br></br>Chip Summary<br></br></h2>
27 <table border="1" cellpadding="5">
28 <tr><td>Machine</td><td>HWI-EAS229</td></tr>
29 <tr><td>Run Folder</td><td>080627_HWI-EAS229_0036_3055HAXX</td></tr>
30 <tr><td>Chip ID</td><td>unknown</td></tr>
32 <h2><br></br>Chip Results Summary<br></br></h2>
33 <table border="1" cellpadding="5">
36 <td>Clusters (PF)</td>
37 <td>Yield (kbases)</td>
44 <h2><br></br>Lane Parameter Summary<br></br></h2>
45 <table border="1" cellpadding="5">
49 <td>Sample Target</td>
62 <td>'((CHASTITY>=0.6))'</td>
64 <td><a href="#Lane1">Lane 1</a></td>
72 <td>'((CHASTITY>=0.6))'</td>
74 <td><a href="#Lane2">Lane 2</a></td>
82 <td>'((CHASTITY>=0.6))'</td>
84 <td><a href="#Lane3">Lane 3</a></td>
92 <td>'((CHASTITY>=0.6))'</td>
94 <td><a href="#Lane4">Lane 4</a></td>
102 <td>'((CHASTITY>=0.6))'</td>
104 <td><a href="#Lane5">Lane 5</a></td>
112 <td>'((CHASTITY>=0.6))'</td>
114 <td><a href="#Lane6">Lane 6</a></td>
122 <td>'((CHASTITY>=0.6))'</td>
124 <td><a href="#Lane7">Lane 7</a></td>
132 <td>'((CHASTITY>=0.6))'</td>
134 <td><a href="#Lane8">Lane 8</a></td>
137 <h2><br></br>Lane Results Summary<br></br></h2>
138 <table border="1" cellpadding="5">
140 <td colspan="2">Lane Info</td>
141 <td colspan="8">Tile Mean +/- SD for Lane</td>
145 <td>Lane Yield (kbases) </td>
146 <td>Clusters (raw)</td>
147 <td>Clusters (PF) </td>
148 <td>1st Cycle Int (PF) </td>
149 <td>% intensity after 20 cycles (PF) </td>
150 <td>% PF Clusters </td>
151 <td>% Align (PF) </td>
152 <td>Alignment Score (PF) </td>
153 <td> % Error Rate (PF) </td>
158 <td>96483 +/- 9074</td>
159 <td>60787 +/- 4240</td>
161 <td>101.88 +/- 6.03</td>
162 <td>63.21 +/- 3.29</td>
163 <td>70.33 +/- 0.24</td>
164 <td>9054.08 +/- 59.16</td>
165 <td>0.46 +/- 0.18</td>
170 <td>133738 +/- 7938</td>
171 <td>60217 +/- 1926</td>
173 <td>92.62 +/- 7.58</td>
174 <td>45.20 +/- 3.31</td>
175 <td>51.98 +/- 0.74</td>
176 <td>6692.04 +/- 92.49</td>
177 <td>0.46 +/- 0.09</td>
182 <td>152142 +/- 10002</td>
183 <td>71468 +/- 2827</td>
185 <td>91.53 +/- 8.66</td>
186 <td>47.19 +/- 3.80</td>
187 <td>82.24 +/- 0.44</td>
188 <td>10598.68 +/- 64.13</td>
189 <td>0.41 +/- 0.04</td>
194 <td>15784 +/- 2162</td>
195 <td>13443 +/- 1728</td>
197 <td>97.53 +/- 9.87</td>
198 <td>85.29 +/- 1.91</td>
199 <td>80.02 +/- 0.53</td>
200 <td>10368.82 +/- 71.08</td>
201 <td>0.15 +/- 0.05</td>
206 <td>119735 +/- 8465</td>
207 <td>64590 +/- 2529</td>
209 <td>88.69 +/- 14.79</td>
210 <td>54.10 +/- 2.59</td>
211 <td>76.95 +/- 0.32</td>
212 <td>9936.47 +/- 65.75</td>
213 <td>0.28 +/- 0.02</td>
218 <td>152177 +/- 8146</td>
219 <td>66716 +/- 2493</td>
221 <td>87.06 +/- 9.86</td>
222 <td>43.98 +/- 3.12</td>
223 <td>78.80 +/- 0.43</td>
224 <td>10162.28 +/- 49.65</td>
225 <td>0.38 +/- 0.03</td>
230 <td>84649 +/- 7325</td>
231 <td>57418 +/- 3617</td>
233 <td>89.40 +/- 8.23</td>
234 <td>67.97 +/- 1.82</td>
235 <td>33.38 +/- 0.25</td>
236 <td>4247.92 +/- 32.37</td>
237 <td>1.00 +/- 0.03</td>
242 <td>54622 +/- 4812</td>
243 <td>41136 +/- 3309</td>
245 <td>90.21 +/- 9.10</td>
246 <td>75.39 +/- 2.27</td>
247 <td>48.33 +/- 0.29</td>
248 <td>6169.21 +/- 169.50</td>
249 <td>0.86 +/- 1.22</td>
251 <tr><td colspan="13">Tile mean across chip</td></tr>
265 <h2><br></br>Expanded Lane Summary<br></br></h2>
266 <table border="1" cellpadding="5">
269 <tr><td colspan="2">Lane Info</td>
270 <td colspan="2">Phasing Info</td>
271 <td colspan="2">Raw Data (tile mean)</td>
272 <td colspan="7">Filtered Data (tile mean)</td></tr>
274 <td>Clusters (tile mean) (raw)</td>
276 <td>% Prephasing </td>
277 <td>% Error Rate (raw) </td>
278 <td> Equiv Perfect Clusters (raw) </td>
280 <td>Cycle 2-4 Av Int (PF) </td>
281 <td>Cycle 2-10 Av % Loss (PF) </td>
282 <td>Cycle 10-20 Av % Loss (PF) </td>
283 <td>% Align (PF) </td>
284 <td>% Error Rate (PF) </td>
285 <td> Equiv Perfect Clusters (PF) </td>
296 <td>0.13 +/- 0.44</td>
297 <td>-1.14 +/- 0.34</td>
311 <td>0.29 +/- 0.40</td>
312 <td>-0.79 +/- 0.35</td>
326 <td>0.68 +/- 0.51</td>
327 <td>-0.77 +/- 0.42</td>
341 <td>0.20 +/- 0.69</td>
342 <td>-1.28 +/- 0.66</td>
356 <td>0.34 +/- 0.49</td>
357 <td>-1.55 +/- 4.69</td>
371 <td>0.57 +/- 0.50</td>
372 <td>-0.91 +/- 0.39</td>
386 <td>1.15 +/- 0.52</td>
387 <td>-0.84 +/- 0.58</td>
401 <td>1.10 +/- 0.59</td>
402 <td>-1.01 +/- 0.47</td>
408 <b><br></br>IVC Plots</b>
409 <p> <a href='IVC.htm' target="_blank"> IVC.htm
411 <b><br></br>All Intensity Plots</b>
412 <p> <a href='All.htm' target="_blank"> All.htm
414 <b><br></br>Error graphs: </b>
415 <p> <a href='Error.htm' target="_blank"> Error.htm
417 <td><a href="#Top">Back to top</a></td>
418 <a name="Lane1"><h2><br></br>Lane 1<br></br></h2></a>
419 <table border="1" cellpadding="5">
423 <td>Clusters (raw)</td>
424 <td>Av 1st Cycle Int (PF) </td>
425 <td>Av % intensity after 20 cycles (PF) </td>
426 <td>% PF Clusters </td>
427 <td>% Align (PF) </td>
428 <td>Av Alignment Score (PF) </td>
429 <td>% Error Rate (PF) </td>
443 <td><a href="#Top">Back to top</a></td>
444 <a name="Lane2"><h2><br></br>Lane 2<br></br></h2></a>
445 <table border="1" cellpadding="5">
449 <td>Clusters (raw)</td>
450 <td>Av 1st Cycle Int (PF) </td>
451 <td>Av % intensity after 20 cycles (PF) </td>
452 <td>% PF Clusters </td>
453 <td>% Align (PF) </td>
454 <td>Av Alignment Score (PF) </td>
455 <td>% Error Rate (PF) </td>
469 <td><a href="#Top">Back to top</a></td>
470 <a name="Lane3"><h2><br></br>Lane 3<br></br></h2></a>
471 <table border="1" cellpadding="5">
475 <td>Clusters (raw)</td>
476 <td>Av 1st Cycle Int (PF) </td>
477 <td>Av % intensity after 20 cycles (PF) </td>
478 <td>% PF Clusters </td>
479 <td>% Align (PF) </td>
480 <td>Av Alignment Score (PF) </td>
481 <td>% Error Rate (PF) </td>
495 <td><a href="#Top">Back to top</a></td>
496 <a name="Lane4"><h2><br></br>Lane 4<br></br></h2></a>
497 <table border="1" cellpadding="5">
501 <td>Clusters (raw)</td>
502 <td>Av 1st Cycle Int (PF) </td>
503 <td>Av % intensity after 20 cycles (PF) </td>
504 <td>% PF Clusters </td>
505 <td>% Align (PF) </td>
506 <td>Av Alignment Score (PF) </td>
507 <td>% Error Rate (PF) </td>
521 <td><a href="#Top">Back to top</a></td>
522 <a name="Lane5"><h2><br></br>Lane 5<br></br></h2></a>
523 <table border="1" cellpadding="5">
527 <td>Clusters (raw)</td>
528 <td>Av 1st Cycle Int (PF) </td>
529 <td>Av % intensity after 20 cycles (PF) </td>
530 <td>% PF Clusters </td>
531 <td>% Align (PF) </td>
532 <td>Av Alignment Score (PF) </td>
533 <td>% Error Rate (PF) </td>
536 <td><a href="#Top">Back to top</a></td>
537 <a name="Lane6"><h2><br></br>Lane 6<br></br></h2></a>
538 <table border="1" cellpadding="5">
542 <td>Clusters (raw)</td>
543 <td>Av 1st Cycle Int (PF) </td>
544 <td>Av % intensity after 20 cycles (PF) </td>
545 <td>% PF Clusters </td>
546 <td>% Align (PF) </td>
547 <td>Av Alignment Score (PF) </td>
548 <td>% Error Rate (PF) </td>
562 <td><a href="#Top">Back to top</a></td>
563 <a name="Lane7"><h2><br></br>Lane 7<br></br></h2></a>
564 <table border="1" cellpadding="5">
568 <td>Clusters (raw)</td>
569 <td>Av 1st Cycle Int (PF) </td>
570 <td>Av % intensity after 20 cycles (PF) </td>
571 <td>% PF Clusters </td>
572 <td>% Align (PF) </td>
573 <td>Av Alignment Score (PF) </td>
574 <td>% Error Rate (PF) </td>
588 <td><a href="#Top">Back to top</a></td>
589 <a name="Lane8"><h2><br></br>Lane 8<br></br></h2></a>
590 <table border="1" cellpadding="5">
594 <td>Clusters (raw)</td>
595 <td>Av 1st Cycle Int (PF) </td>
596 <td>Av % intensity after 20 cycles (PF) </td>
597 <td>% PF Clusters </td>
598 <td>% Align (PF) </td>
599 <td>Av Alignment Score (PF) </td>
600 <td>% Error Rate (PF) </td>
614 <td><a href="#Top">Back to top</a></td>
618 pathname = os.path.join(gerald_dir, 'Summary.htm')
619 f = open(pathname, 'w')
623 def make_eland_results(gerald_dir):
624 eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D.
625 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 chr2L.fa 8796855 R DD 24T
626 >HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0
627 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T
630 pathname = os.path.join(gerald_dir,
631 's_%d_eland_result.txt' % (i,))
632 f = open(pathname, 'w')
633 f.write(eland_result)
636 def make_runfolder(obj=None):
638 Make a fake runfolder, attach all the directories to obj if defined
640 # make a fake runfolder directory
641 temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
643 runfolder_dir = os.path.join(temp_dir,
644 '080102_HWI-EAS229_0010_207BTAAXX')
645 os.mkdir(runfolder_dir)
647 data_dir = os.path.join(runfolder_dir, 'Data')
650 firecrest_dir = os.path.join(data_dir,
651 'C1-33_Firecrest1.8.28_12-04-2008_diane'
653 os.mkdir(firecrest_dir)
654 matrix_dir = os.path.join(firecrest_dir, 'Matrix')
656 matrix_filename = os.path.join(matrix_dir, 's_matrix.txt')
657 make_matrix(matrix_filename)
659 bustard_dir = os.path.join(firecrest_dir,
660 'Bustard1.8.28_12-04-2008_diane')
661 os.mkdir(bustard_dir)
662 make_phasing_params(bustard_dir)
664 gerald_dir = os.path.join(bustard_dir,
665 'GERALD_12-04-2008_diane')
667 make_gerald_config_026(gerald_dir)
668 make_summary_htm(gerald_dir)
669 make_eland_results(gerald_dir)
672 obj.temp_dir = temp_dir
673 obj.runfolder_dir = runfolder_dir
674 obj.data_dir = data_dir
675 obj.firecrest_dir = firecrest_dir
676 obj.matrix_dir = matrix_dir
677 obj.bustard_dir = bustard_dir
678 obj.gerald_dir = gerald_dir
681 class RunfolderTests(unittest.TestCase):
683 Test components of the runfolder processing code
684 which includes firecrest, bustard, and gerald
687 # attaches all the directories to the object passed in
691 shutil.rmtree(self.temp_dir)
693 def test_firecrest(self):
695 Construct a firecrest object
697 f = firecrest.firecrest(self.firecrest_dir)
698 self.failUnlessEqual(f.software, 'Firecrest')
699 self.failUnlessEqual(f.version, '1.8.28')
700 self.failUnlessEqual(f.start, 1)
701 self.failUnlessEqual(f.stop, 33)
702 self.failUnlessEqual(f.user, 'diane')
703 self.failUnlessEqual(f.date, date(2008,4,12))
705 xml = f.get_elements()
706 # just make sure that element tree can serialize the tree
707 xml_str = ElementTree.tostring(xml)
709 f2 = firecrest.Firecrest(xml=xml)
710 self.failUnlessEqual(f.software, f2.software)
711 self.failUnlessEqual(f.version, f2.version)
712 self.failUnlessEqual(f.start, f2.start)
713 self.failUnlessEqual(f.stop, f2.stop)
714 self.failUnlessEqual(f.user, f2.user)
715 self.failUnlessEqual(f.date, f2.date)
717 def test_bustard(self):
719 construct a bustard object
721 b = bustard.bustard(self.bustard_dir)
722 self.failUnlessEqual(b.software, 'Bustard')
723 self.failUnlessEqual(b.version, '1.8.28')
724 self.failUnlessEqual(b.date, date(2008,4,12))
725 self.failUnlessEqual(b.user, 'diane')
726 self.failUnlessEqual(len(b.phasing), 8)
727 self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099)
729 xml = b.get_elements()
730 b2 = bustard.Bustard(xml=xml)
731 self.failUnlessEqual(b.software, b2.software)
732 self.failUnlessEqual(b.version, b2.version)
733 self.failUnlessEqual(b.date, b2.date )
734 self.failUnlessEqual(b.user, b2.user)
735 self.failUnlessEqual(len(b.phasing), len(b2.phasing))
736 for key in b.phasing.keys():
737 self.failUnlessEqual(b.phasing[key].lane,
738 b2.phasing[key].lane)
739 self.failUnlessEqual(b.phasing[key].phasing,
740 b2.phasing[key].phasing)
741 self.failUnlessEqual(b.phasing[key].prephasing,
742 b2.phasing[key].prephasing)
744 def test_gerald(self):
745 # need to update gerald and make tests for it
746 g = gerald.gerald(self.gerald_dir)
748 self.failUnlessEqual(g.software, 'GERALD')
749 self.failUnlessEqual(g.version, '1.68.2.2')
750 self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30))
751 self.failUnlessEqual(len(g.lanes), len(g.lanes.keys()))
752 self.failUnlessEqual(len(g.lanes), len(g.lanes.items()))
755 # list of genomes, matches what was defined up in
756 # make_gerald_config.
757 # the first None is to offset the genomes list to be 1..9
758 # instead of pythons default 0..8
759 genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2',
760 '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ]
762 # test lane specific parameters from gerald config file
764 cur_lane = g.lanes[i]
765 self.failUnlessEqual(cur_lane.analysis, 'eland')
766 self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
767 self.failUnlessEqual(cur_lane.read_length, '32')
768 self.failUnlessEqual(cur_lane.use_bases, 'Y'*32)
770 # test data extracted from summary file
772 (96483, 9074), (133738, 7938),
773 (152142, 10002), (15784, 2162),
774 (119735, 8465), (152177, 8146),
775 (84649, 7325), (54622, 4812),]
777 self.failUnlessEqual(len(g.summary), 1)
779 summary_lane = g.summary[0][i]
780 self.failUnlessEqual(summary_lane.cluster, clusters[i])
781 self.failUnlessEqual(summary_lane.lane, i)
783 xml = g.get_elements()
784 # just make sure that element tree can serialize the tree
785 xml_str = ElementTree.tostring(xml)
786 g2 = gerald.Gerald(xml=xml)
788 # do it all again after extracting from the xml file
789 self.failUnlessEqual(g.version, g2.version)
790 self.failUnlessEqual(g.date, g2.date)
791 self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys()))
792 self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items()))
794 # test lane specific parameters from gerald config file
797 g2_lane = g2.lanes[i]
798 self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
799 self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
800 self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
801 self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
803 # test (some) summary elements
804 self.failUnlessEqual(len(g.summary), 1)
806 g_summary = g.summary[0][i]
807 g2_summary = g2.summary[0][i]
808 self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
809 self.failUnlessEqual(g_summary.lane, g2_summary.lane)
811 g_eland = g.eland_results
812 g2_eland = g2.eland_results
814 g_results = g_eland[key]
815 g2_results = g2_eland[key]
816 self.failUnlessEqual(g_results.reads,
818 self.failUnlessEqual(len(g_results.mapped_reads),
819 len(g2_results.mapped_reads))
820 for k in g_results.mapped_reads.keys():
821 self.failUnlessEqual(g_results.mapped_reads[k],
822 g2_results.mapped_reads[k])
824 self.failUnlessEqual(len(g_results.match_codes),
825 len(g2_results.match_codes))
826 for k in g_results.match_codes.keys():
827 self.failUnlessEqual(g_results.match_codes[k],
828 g2_results.match_codes[k])
831 def test_eland(self):
832 dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
833 'chr2L.fa': 'dm3/chr2L.fa',
834 'Lambda.fa': 'Lambda.fa'}
835 genome_maps = { 1:dm3_map, 2:dm3_map, 3:dm3_map, 4:dm3_map,
836 5:dm3_map, 6:dm3_map, 7:dm3_map, 8:dm3_map }
837 eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
841 self.failUnlessEqual(lane.reads, 4)
842 self.failUnlessEqual(lane.sample_name, "s")
843 self.failUnlessEqual(lane.lane_id, key.lane)
844 self.failUnlessEqual(len(lane.mapped_reads), 3)
845 self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
846 self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
847 self.failUnlessEqual(lane.match_codes['U1'], 2)
848 self.failUnlessEqual(lane.match_codes['NM'], 1)
850 xml = eland.get_elements()
851 # just make sure that element tree can serialize the tree
852 xml_str = ElementTree.tostring(xml)
853 e2 = gerald.ELAND(xml=xml)
858 self.failUnlessEqual(l1.reads, l2.reads)
859 self.failUnlessEqual(l1.sample_name, l2.sample_name)
860 self.failUnlessEqual(l1.lane_id, l2.lane_id)
861 self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
862 self.failUnlessEqual(len(l1.mapped_reads), 3)
863 for k in l1.mapped_reads.keys():
864 self.failUnlessEqual(l1.mapped_reads[k],
867 self.failUnlessEqual(len(l1.match_codes), 9)
868 self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
869 for k in l1.match_codes.keys():
870 self.failUnlessEqual(l1.match_codes[k],
873 def test_runfolder(self):
874 runs = runfolder.get_runs(self.runfolder_dir)
876 # do we get the flowcell id from the filename?
877 self.failUnlessEqual(len(runs), 1)
878 self.failUnlessEqual(runs[0].name, 'run_207BTAAXX_2008-04-19.xml')
880 # do we get the flowcell id from the FlowcellId.xml file
881 make_flowcell_id(self.runfolder_dir, '207BTAAXY')
882 runs = runfolder.get_runs(self.runfolder_dir)
883 self.failUnlessEqual(len(runs), 1)
884 self.failUnlessEqual(runs[0].name, 'run_207BTAAXY_2008-04-19.xml')
887 xml = r1.get_elements()
888 xml_str = ElementTree.tostring(xml)
890 r2 = runfolder.PipelineRun(xml=xml)
891 self.failUnlessEqual(r1.name, r2.name)
892 self.failIfEqual(r2.image_analysis, None)
893 self.failIfEqual(r2.bustard, None)
894 self.failIfEqual(r2.gerald, None)
898 return unittest.makeSuite(RunfolderTests,'test')
900 if __name__ == "__main__":
901 unittest.main(defaultTest="suite")