3 from datetime import datetime, date
9 from htsworkflow.pipelines import firecrest
10 from htsworkflow.pipelines import bustard
11 from htsworkflow.pipelines import gerald
12 from htsworkflow.pipelines import runfolder
13 from htsworkflow.pipelines.runfolder import ElementTree
15 from htsworkflow.pipelines.test.simulate_runfolder import *
18 def make_summary_htm(gerald_dir):
19 summary_htm="""<!--RUN_TIME Wed Jul 2 06:47:44 2008 -->
20 <!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.94 2007/12/04 09:59:07 rshaw Exp $-->
24 <a name="Top"><h2><title>080627_HWI-EAS229_0036_3055HAXX Summary</title></h2></a>
25 <h1>Summary Information For Experiment 080627_HWI-EAS229_0036_3055HAXX on Machine HWI-EAS229</h1>
26 <h2><br></br>Chip Summary<br></br></h2>
27 <table border="1" cellpadding="5">
28 <tr><td>Machine</td><td>HWI-EAS229</td></tr>
29 <tr><td>Run Folder</td><td>080627_HWI-EAS229_0036_3055HAXX</td></tr>
30 <tr><td>Chip ID</td><td>unknown</td></tr>
32 <h2><br></br>Chip Results Summary<br></br></h2>
33 <table border="1" cellpadding="5">
36 <td>Clusters (PF)</td>
37 <td>Yield (kbases)</td>
44 <h2><br></br>Lane Parameter Summary<br></br></h2>
45 <table border="1" cellpadding="5">
49 <td>Sample Target</td>
62 <td>'((CHASTITY>=0.6))'</td>
64 <td><a href="#Lane1">Lane 1</a></td>
72 <td>'((CHASTITY>=0.6))'</td>
74 <td><a href="#Lane2">Lane 2</a></td>
82 <td>'((CHASTITY>=0.6))'</td>
84 <td><a href="#Lane3">Lane 3</a></td>
92 <td>'((CHASTITY>=0.6))'</td>
94 <td><a href="#Lane4">Lane 4</a></td>
102 <td>'((CHASTITY>=0.6))'</td>
104 <td><a href="#Lane5">Lane 5</a></td>
112 <td>'((CHASTITY>=0.6))'</td>
114 <td><a href="#Lane6">Lane 6</a></td>
122 <td>'((CHASTITY>=0.6))'</td>
124 <td><a href="#Lane7">Lane 7</a></td>
132 <td>'((CHASTITY>=0.6))'</td>
134 <td><a href="#Lane8">Lane 8</a></td>
137 <h2><br></br>Lane Results Summary<br></br></h2>
138 <table border="1" cellpadding="5">
140 <td colspan="2">Lane Info</td>
141 <td colspan="8">Tile Mean +/- SD for Lane</td>
145 <td>Lane Yield (kbases) </td>
146 <td>Clusters (raw)</td>
147 <td>Clusters (PF) </td>
148 <td>1st Cycle Int (PF) </td>
149 <td>% intensity after 20 cycles (PF) </td>
150 <td>% PF Clusters </td>
151 <td>% Align (PF) </td>
152 <td>Alignment Score (PF) </td>
153 <td> % Error Rate (PF) </td>
158 <td>96483 +/- 9074</td>
159 <td>60787 +/- 4240</td>
161 <td>101.88 +/- 6.03</td>
162 <td>63.21 +/- 3.29</td>
163 <td>70.33 +/- 0.24</td>
164 <td>9054.08 +/- 59.16</td>
165 <td>0.46 +/- 0.18</td>
170 <td>133738 +/- 7938</td>
171 <td>60217 +/- 1926</td>
173 <td>92.62 +/- 7.58</td>
174 <td>45.20 +/- 3.31</td>
175 <td>51.98 +/- 0.74</td>
176 <td>6692.04 +/- 92.49</td>
177 <td>0.46 +/- 0.09</td>
182 <td>152142 +/- 10002</td>
183 <td>71468 +/- 2827</td>
185 <td>91.53 +/- 8.66</td>
186 <td>47.19 +/- 3.80</td>
187 <td>82.24 +/- 0.44</td>
188 <td>10598.68 +/- 64.13</td>
189 <td>0.41 +/- 0.04</td>
194 <td>15784 +/- 2162</td>
195 <td>13443 +/- 1728</td>
197 <td>97.53 +/- 9.87</td>
198 <td>85.29 +/- 1.91</td>
199 <td>80.02 +/- 0.53</td>
200 <td>10368.82 +/- 71.08</td>
201 <td>0.15 +/- 0.05</td>
206 <td>119735 +/- 8465</td>
207 <td>64590 +/- 2529</td>
209 <td>88.69 +/- 14.79</td>
210 <td>54.10 +/- 2.59</td>
211 <td>76.95 +/- 0.32</td>
212 <td>9936.47 +/- 65.75</td>
213 <td>0.28 +/- 0.02</td>
218 <td>152177 +/- 8146</td>
219 <td>66716 +/- 2493</td>
221 <td>87.06 +/- 9.86</td>
222 <td>43.98 +/- 3.12</td>
223 <td>78.80 +/- 0.43</td>
224 <td>10162.28 +/- 49.65</td>
225 <td>0.38 +/- 0.03</td>
230 <td>84649 +/- 7325</td>
231 <td>57418 +/- 3617</td>
233 <td>89.40 +/- 8.23</td>
234 <td>67.97 +/- 1.82</td>
235 <td>33.38 +/- 0.25</td>
236 <td>4247.92 +/- 32.37</td>
237 <td>1.00 +/- 0.03</td>
242 <td>54622 +/- 4812</td>
243 <td>41136 +/- 3309</td>
245 <td>90.21 +/- 9.10</td>
246 <td>75.39 +/- 2.27</td>
247 <td>48.33 +/- 0.29</td>
248 <td>6169.21 +/- 169.50</td>
249 <td>0.86 +/- 1.22</td>
251 <tr><td colspan="13">Tile mean across chip</td></tr>
265 <h2><br></br>Expanded Lane Summary<br></br></h2>
266 <table border="1" cellpadding="5">
269 <tr><td colspan="2">Lane Info</td>
270 <td colspan="2">Phasing Info</td>
271 <td colspan="2">Raw Data (tile mean)</td>
272 <td colspan="7">Filtered Data (tile mean)</td></tr>
274 <td>Clusters (tile mean) (raw)</td>
276 <td>% Prephasing </td>
277 <td>% Error Rate (raw) </td>
278 <td> Equiv Perfect Clusters (raw) </td>
280 <td>Cycle 2-4 Av Int (PF) </td>
281 <td>Cycle 2-10 Av % Loss (PF) </td>
282 <td>Cycle 10-20 Av % Loss (PF) </td>
283 <td>% Align (PF) </td>
284 <td>% Error Rate (PF) </td>
285 <td> Equiv Perfect Clusters (PF) </td>
296 <td>0.13 +/- 0.44</td>
297 <td>-1.14 +/- 0.34</td>
311 <td>0.29 +/- 0.40</td>
312 <td>-0.79 +/- 0.35</td>
326 <td>0.68 +/- 0.51</td>
327 <td>-0.77 +/- 0.42</td>
341 <td>0.20 +/- 0.69</td>
342 <td>-1.28 +/- 0.66</td>
356 <td>0.34 +/- 0.49</td>
357 <td>-1.55 +/- 4.69</td>
371 <td>0.57 +/- 0.50</td>
372 <td>-0.91 +/- 0.39</td>
386 <td>1.15 +/- 0.52</td>
387 <td>-0.84 +/- 0.58</td>
401 <td>1.10 +/- 0.59</td>
402 <td>-1.01 +/- 0.47</td>
408 <b><br></br>IVC Plots</b>
409 <p> <a href='IVC.htm' target="_blank"> IVC.htm
411 <b><br></br>All Intensity Plots</b>
412 <p> <a href='All.htm' target="_blank"> All.htm
414 <b><br></br>Error graphs: </b>
415 <p> <a href='Error.htm' target="_blank"> Error.htm
417 <td><a href="#Top">Back to top</a></td>
418 <a name="Lane1"><h2><br></br>Lane 1<br></br></h2></a>
419 <table border="1" cellpadding="5">
423 <td>Clusters (raw)</td>
424 <td>Av 1st Cycle Int (PF) </td>
425 <td>Av % intensity after 20 cycles (PF) </td>
426 <td>% PF Clusters </td>
427 <td>% Align (PF) </td>
428 <td>Av Alignment Score (PF) </td>
429 <td>% Error Rate (PF) </td>
443 <td><a href="#Top">Back to top</a></td>
444 <a name="Lane2"><h2><br></br>Lane 2<br></br></h2></a>
445 <table border="1" cellpadding="5">
449 <td>Clusters (raw)</td>
450 <td>Av 1st Cycle Int (PF) </td>
451 <td>Av % intensity after 20 cycles (PF) </td>
452 <td>% PF Clusters </td>
453 <td>% Align (PF) </td>
454 <td>Av Alignment Score (PF) </td>
455 <td>% Error Rate (PF) </td>
469 <td><a href="#Top">Back to top</a></td>
470 <a name="Lane3"><h2><br></br>Lane 3<br></br></h2></a>
471 <table border="1" cellpadding="5">
475 <td>Clusters (raw)</td>
476 <td>Av 1st Cycle Int (PF) </td>
477 <td>Av % intensity after 20 cycles (PF) </td>
478 <td>% PF Clusters </td>
479 <td>% Align (PF) </td>
480 <td>Av Alignment Score (PF) </td>
481 <td>% Error Rate (PF) </td>
495 <td><a href="#Top">Back to top</a></td>
496 <a name="Lane4"><h2><br></br>Lane 4<br></br></h2></a>
497 <table border="1" cellpadding="5">
501 <td>Clusters (raw)</td>
502 <td>Av 1st Cycle Int (PF) </td>
503 <td>Av % intensity after 20 cycles (PF) </td>
504 <td>% PF Clusters </td>
505 <td>% Align (PF) </td>
506 <td>Av Alignment Score (PF) </td>
507 <td>% Error Rate (PF) </td>
521 <td><a href="#Top">Back to top</a></td>
522 <a name="Lane5"><h2><br></br>Lane 5<br></br></h2></a>
523 <table border="1" cellpadding="5">
527 <td>Clusters (raw)</td>
528 <td>Av 1st Cycle Int (PF) </td>
529 <td>Av % intensity after 20 cycles (PF) </td>
530 <td>% PF Clusters </td>
531 <td>% Align (PF) </td>
532 <td>Av Alignment Score (PF) </td>
533 <td>% Error Rate (PF) </td>
536 <td><a href="#Top">Back to top</a></td>
537 <a name="Lane6"><h2><br></br>Lane 6<br></br></h2></a>
538 <table border="1" cellpadding="5">
542 <td>Clusters (raw)</td>
543 <td>Av 1st Cycle Int (PF) </td>
544 <td>Av % intensity after 20 cycles (PF) </td>
545 <td>% PF Clusters </td>
546 <td>% Align (PF) </td>
547 <td>Av Alignment Score (PF) </td>
548 <td>% Error Rate (PF) </td>
562 <td><a href="#Top">Back to top</a></td>
563 <a name="Lane7"><h2><br></br>Lane 7<br></br></h2></a>
564 <table border="1" cellpadding="5">
568 <td>Clusters (raw)</td>
569 <td>Av 1st Cycle Int (PF) </td>
570 <td>Av % intensity after 20 cycles (PF) </td>
571 <td>% PF Clusters </td>
572 <td>% Align (PF) </td>
573 <td>Av Alignment Score (PF) </td>
574 <td>% Error Rate (PF) </td>
588 <td><a href="#Top">Back to top</a></td>
589 <a name="Lane8"><h2><br></br>Lane 8<br></br></h2></a>
590 <table border="1" cellpadding="5">
594 <td>Clusters (raw)</td>
595 <td>Av 1st Cycle Int (PF) </td>
596 <td>Av % intensity after 20 cycles (PF) </td>
597 <td>% PF Clusters </td>
598 <td>% Align (PF) </td>
599 <td>Av Alignment Score (PF) </td>
600 <td>% Error Rate (PF) </td>
614 <td><a href="#Top">Back to top</a></td>
618 pathname = os.path.join(gerald_dir, 'Summary.htm')
619 f = open(pathname, 'w')
623 def make_eland_results(gerald_dir):
624 eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D.
625 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 chr2L.fa 8796855 R DD 24T
626 >HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0
627 >HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T
630 pathname = os.path.join(gerald_dir,
631 's_%d_eland_result.txt' % (i,))
632 f = open(pathname, 'w')
633 f.write(eland_result)
636 def make_runfolder(obj=None):
638 Make a fake runfolder, attach all the directories to obj if defined
640 # make a fake runfolder directory
641 temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_')
643 runfolder_dir = os.path.join(temp_dir,
644 '080102_HWI-EAS229_0010_207BTAAXX')
645 os.mkdir(runfolder_dir)
647 data_dir = os.path.join(runfolder_dir, 'Data')
650 firecrest_dir = os.path.join(data_dir,
651 'C1-33_Firecrest1.8.28_12-04-2008_diane'
653 os.mkdir(firecrest_dir)
654 matrix_dir = os.path.join(firecrest_dir, 'Matrix')
656 make_matrix(matrix_dir)
658 bustard_dir = os.path.join(firecrest_dir,
659 'Bustard1.8.28_12-04-2008_diane')
660 os.mkdir(bustard_dir)
661 make_phasing_params(bustard_dir)
663 gerald_dir = os.path.join(bustard_dir,
664 'GERALD_12-04-2008_diane')
666 make_gerald_config(gerald_dir)
667 make_summary_htm(gerald_dir)
668 make_eland_results(gerald_dir)
671 obj.temp_dir = temp_dir
672 obj.runfolder_dir = runfolder_dir
673 obj.data_dir = data_dir
674 obj.firecrest_dir = firecrest_dir
675 obj.matrix_dir = matrix_dir
676 obj.bustard_dir = bustard_dir
677 obj.gerald_dir = gerald_dir
680 class RunfolderTests(unittest.TestCase):
682 Test components of the runfolder processing code
683 which includes firecrest, bustard, and gerald
686 # attaches all the directories to the object passed in
690 shutil.rmtree(self.temp_dir)
692 def test_firecrest(self):
694 Construct a firecrest object
696 f = firecrest.firecrest(self.firecrest_dir)
697 self.failUnlessEqual(f.version, '1.8.28')
698 self.failUnlessEqual(f.start, 1)
699 self.failUnlessEqual(f.stop, 33)
700 self.failUnlessEqual(f.user, 'diane')
701 self.failUnlessEqual(f.date, date(2008,4,12))
703 xml = f.get_elements()
704 # just make sure that element tree can serialize the tree
705 xml_str = ElementTree.tostring(xml)
707 f2 = firecrest.Firecrest(xml=xml)
708 self.failUnlessEqual(f.version, f2.version)
709 self.failUnlessEqual(f.start, f2.start)
710 self.failUnlessEqual(f.stop, f2.stop)
711 self.failUnlessEqual(f.user, f2.user)
712 self.failUnlessEqual(f.date, f2.date)
714 def test_bustard(self):
716 construct a bustard object
718 b = bustard.bustard(self.bustard_dir)
719 self.failUnlessEqual(b.version, '1.8.28')
720 self.failUnlessEqual(b.date, date(2008,4,12))
721 self.failUnlessEqual(b.user, 'diane')
722 self.failUnlessEqual(len(b.phasing), 8)
723 self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099)
725 xml = b.get_elements()
726 b2 = bustard.Bustard(xml=xml)
727 self.failUnlessEqual(b.version, b2.version)
728 self.failUnlessEqual(b.date, b2.date )
729 self.failUnlessEqual(b.user, b2.user)
730 self.failUnlessEqual(len(b.phasing), len(b2.phasing))
731 for key in b.phasing.keys():
732 self.failUnlessEqual(b.phasing[key].lane,
733 b2.phasing[key].lane)
734 self.failUnlessEqual(b.phasing[key].phasing,
735 b2.phasing[key].phasing)
736 self.failUnlessEqual(b.phasing[key].prephasing,
737 b2.phasing[key].prephasing)
739 def test_gerald(self):
740 # need to update gerald and make tests for it
741 g = gerald.gerald(self.gerald_dir)
743 self.failUnlessEqual(g.version,
744 '@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp')
745 self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30))
746 self.failUnlessEqual(len(g.lanes), len(g.lanes.keys()))
747 self.failUnlessEqual(len(g.lanes), len(g.lanes.items()))
750 # list of genomes, matches what was defined up in
751 # make_gerald_config.
752 # the first None is to offset the genomes list to be 1..9
753 # instead of pythons default 0..8
754 genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2',
755 '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ]
757 # test lane specific parameters from gerald config file
759 cur_lane = g.lanes[i]
760 self.failUnlessEqual(cur_lane.analysis, 'eland')
761 self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
762 self.failUnlessEqual(cur_lane.read_length, '32')
763 self.failUnlessEqual(cur_lane.use_bases, 'Y'*32)
765 # test data extracted from summary file
767 (96483, 9074), (133738, 7938),
768 (152142, 10002), (15784, 2162),
769 (119735, 8465), (152177, 8146),
770 (84649, 7325), (54622, 4812),]
772 self.failUnlessEqual(len(g.summary), 1)
774 summary_lane = g.summary[0][i]
775 self.failUnlessEqual(summary_lane.cluster, clusters[i])
776 self.failUnlessEqual(summary_lane.lane, i)
778 xml = g.get_elements()
779 # just make sure that element tree can serialize the tree
780 xml_str = ElementTree.tostring(xml)
781 g2 = gerald.Gerald(xml=xml)
783 # do it all again after extracting from the xml file
784 self.failUnlessEqual(g.version, g2.version)
785 self.failUnlessEqual(g.date, g2.date)
786 self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys()))
787 self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items()))
789 # test lane specific parameters from gerald config file
792 g2_lane = g2.lanes[i]
793 self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
794 self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
795 self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
796 self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
798 # test (some) summary elements
799 self.failUnlessEqual(len(g.summary), 1)
801 g_summary = g.summary[0][i]
802 g2_summary = g2.summary[0][i]
803 self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
804 self.failUnlessEqual(g_summary.lane, g2_summary.lane)
806 g_eland = g.eland_results
807 g2_eland = g2.eland_results
808 for lane in g_eland.keys():
809 self.failUnlessEqual(g_eland[lane].reads,
810 g2_eland[lane].reads)
811 self.failUnlessEqual(len(g_eland[lane].mapped_reads),
812 len(g2_eland[lane].mapped_reads))
813 for k in g_eland[lane].mapped_reads.keys():
814 self.failUnlessEqual(g_eland[lane].mapped_reads[k],
815 g2_eland[lane].mapped_reads[k])
817 self.failUnlessEqual(len(g_eland[lane].match_codes),
818 len(g2_eland[lane].match_codes))
819 for k in g_eland[lane].match_codes.keys():
820 self.failUnlessEqual(g_eland[lane].match_codes[k],
821 g2_eland[lane].match_codes[k])
824 def test_eland(self):
825 dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
826 'chr2L.fa': 'dm3/chr2L.fa',
827 'Lambda.fa': 'Lambda.fa'}
828 genome_maps = { 1:dm3_map, 2:dm3_map, 3:dm3_map, 4:dm3_map,
829 5:dm3_map, 6:dm3_map, 7:dm3_map, 8:dm3_map }
830 eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
834 self.failUnlessEqual(lane.reads, 4)
835 self.failUnlessEqual(lane.sample_name, "s")
836 self.failUnlessEqual(lane.lane_id, i)
837 self.failUnlessEqual(len(lane.mapped_reads), 3)
838 self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
839 self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
840 self.failUnlessEqual(lane.match_codes['U1'], 2)
841 self.failUnlessEqual(lane.match_codes['NM'], 1)
843 xml = eland.get_elements()
844 # just make sure that element tree can serialize the tree
845 xml_str = ElementTree.tostring(xml)
846 e2 = gerald.ELAND(xml=xml)
851 self.failUnlessEqual(l1.reads, l2.reads)
852 self.failUnlessEqual(l1.sample_name, l2.sample_name)
853 self.failUnlessEqual(l1.lane_id, l2.lane_id)
854 self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
855 self.failUnlessEqual(len(l1.mapped_reads), 3)
856 for k in l1.mapped_reads.keys():
857 self.failUnlessEqual(l1.mapped_reads[k],
860 self.failUnlessEqual(len(l1.match_codes), 9)
861 self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes))
862 for k in l1.match_codes.keys():
863 self.failUnlessEqual(l1.match_codes[k],
866 def test_runfolder(self):
867 runs = runfolder.get_runs(self.runfolder_dir)
869 # do we get the flowcell id from the filename?
870 self.failUnlessEqual(len(runs), 1)
871 self.failUnlessEqual(runs[0].name, 'run_207BTAAXX_2008-04-19.xml')
873 # do we get the flowcell id from the FlowcellId.xml file
874 make_flowcell_id(self.runfolder_dir, '207BTAAXY')
875 runs = runfolder.get_runs(self.runfolder_dir)
876 self.failUnlessEqual(len(runs), 1)
877 self.failUnlessEqual(runs[0].name, 'run_207BTAAXY_2008-04-19.xml')
880 xml = r1.get_elements()
881 xml_str = ElementTree.tostring(xml)
883 r2 = runfolder.PipelineRun(xml=xml)
884 self.failUnlessEqual(r1.name, r2.name)
885 self.failIfEqual(r2.image_analysis, None)
886 self.failIfEqual(r2.bustard, None)
887 self.failIfEqual(r2.gerald, None)
891 return unittest.makeSuite(RunfolderTests,'test')
893 if __name__ == "__main__":
894 unittest.main(defaultTest="suite")