The summary parsing code now seems to handle paired end runs
authorDiane Trout <diane@caltech.edu>
Wed, 10 Dec 2008 01:00:25 +0000 (01:00 +0000)
committerDiane Trout <diane@caltech.edu>
Wed, 10 Dec 2008 01:00:25 +0000 (01:00 +0000)
this required changing how the lane_results were being stored,
previously it was a dictionary indexed by lane, now it is a list
of dictionaries, where the list index indicates which "end" of
a paired end run it is. (0 is the first, 1 is the second)

Also I got tired of being forced to use strings for the lane index
by element tree and modified the code so it converts the strings
required by element tree to integers for our internal dictionaries.

htsworkflow/pipelines/eland.py
htsworkflow/pipelines/gerald.py
htsworkflow/pipelines/summary.py
htsworkflow/pipelines/test/simulate_runfolder.py
htsworkflow/pipelines/test/test_runfolder026.py
htsworkflow/pipelines/test/test_runfolder030.py
htsworkflow/pipelines/test/test_runfolder110.py
htsworkflow/pipelines/test/test_runfolder_ipar100.py
htsworkflow/pipelines/test/test_runfolder_pair.py

index d44dae822d2f360b75deb3e2607dde6c13781ca0..a010e1de069d179180fe1e2dc2d6dc905215733f 100644 (file)
@@ -167,7 +167,7 @@ class ElandLane(object):
         path, name = os.path.split(self.pathname)
         split_name = name.split('_')
         self._sample_name = split_name[0]
-        self._lane_id = split_name[1]
+        self._lane_id = int(split_name[1])
 
     def _get_sample_name(self):
         if self._sample_name is None:
@@ -206,7 +206,7 @@ class ElandLane(object):
         sample_tag = ElementTree.SubElement(lane, ElandLane.SAMPLE_NAME)
         sample_tag.text = self.sample_name
         lane_tag = ElementTree.SubElement(lane, ElandLane.LANE_ID)
-        lane_tag.text = self.lane_id
+        lane_tag.text = str(self.lane_id)
         genome_map = ElementTree.SubElement(lane, ElandLane.GENOME_MAP)
         for k, v in self.genome_map.items():
             item = ElementTree.SubElement(
@@ -240,7 +240,7 @@ class ElandLane(object):
             if tag == ElandLane.SAMPLE_NAME.lower():
                 self._sample_name = element.text
             elif tag == ElandLane.LANE_ID.lower():
-                self._lane_id = element.text
+                self._lane_id = int(element.text)
             elif tag == ElandLane.GENOME_MAP.lower():
                 for child in element:
                     name = child.attrib['name']
@@ -306,7 +306,7 @@ class ELAND(object):
         if tree.tag.lower() != ELAND.ELAND.lower():
             raise ValueError('Expecting %s', ELAND.ELAND)
         for element in list(tree):
-            lane_id = element.attrib[ELAND.LANE_ID]
+            lane_id = int(element.attrib[ELAND.LANE_ID])
             lane = ElandLane(xml=element)
             self.results[lane_id] = lane
 
@@ -326,18 +326,18 @@ def eland(basedir, gerald=None, genome_maps=None):
         # lets handle compressed eland files too
         file_list = glob(os.path.join(basedir, "*_eland_result.txt.bz2"))
 
-    lane_ids = ['1','2','3','4','5','6','7','8']
+    lane_ids = range(1,9)
     # the order in patterns determines the preference for what
     # will be found.
-    patterns = ['s_%s_eland_result.txt',
-                's_%s_eland_result.txt.bz2',
-                's_%s_eland_result.txt.gz',
-                's_%s_eland_extended.txt',
-                's_%s_eland_extended.txt.bz2',
-                's_%s_eland_extended.txt.gz',
-                's_%s_eland_multi.txt',
-                's_%s_eland_multi.txt.bz2',
-                's_%s_eland_multi.txt.gz',]
+    patterns = ['s_%d_eland_result.txt',
+                's_%d_eland_result.txt.bz2',
+                's_%d_eland_result.txt.gz',
+                's_%d_eland_extended.txt',
+                's_%d_eland_extended.txt.bz2',
+                's_%d_eland_extended.txt.gz',
+                's_%d_eland_multi.txt',
+                's_%d_eland_multi.txt.bz2',
+                's_%d_eland_multi.txt.gz',]
 
     for lane_id in lane_ids:
         for p in patterns:
@@ -353,7 +353,7 @@ def eland(basedir, gerald=None, genome_maps=None):
         path, name = os.path.split(pathname)
         logging.info("Adding eland file %s" %(name,))
         split_name = name.split('_')
-        lane_id = split_name[1]
+        lane_id = int(split_name[1])
 
         if genome_maps is not None:
             genome_map = genome_maps[lane_id]
index 7e2328ad3b7613f1886accef135e0698082f12ba..a5dd323861beb46549fc3b98be0f6af77eec6e84 100644 (file)
@@ -88,7 +88,8 @@ class Gerald(object):
             # those consistently.
             for element in analysis:
                 sample, lane_id = element.tag.split('_')
-                self._lanes[lane_id] = Gerald.LaneParameters(self._gerald, lane_id)
+                self._lanes[int(lane_id)] = Gerald.LaneParameters(
+                                              self._gerald, lane_id)
 
         def __getitem__(self, key):
             if self._lane is None:
@@ -201,4 +202,4 @@ if __name__ == "__main__":
   # quick test code
   import sys
   g = gerald(sys.argv[1])
-  #ElementTree.dump(g.get_elements())
\ No newline at end of file
+  #ElementTree.dump(g.get_elements())
index ad07d06b3992a07b172766213deeeb279135fb0d..72b3e5e058058e8e4715b0c033b314a8b25ff3ef 100644 (file)
@@ -10,7 +10,7 @@ class Summary(object):
     """
     Extract some useful information from the Summary.htm file
     """
-    XML_VERSION = 2
+    XML_VERSION = 3
     SUMMARY = 'Summary'
 
     class LaneResultSummary(object):
@@ -33,6 +33,7 @@ class Summary(object):
 
         def __init__(self, html=None, xml=None):
             self.lane = None
+            self.end = 0
             self.lane_yield = None
             self.cluster = None
             self.cluster_pass_filter = None
@@ -53,7 +54,7 @@ class Summary(object):
                 raise RuntimeError("Summary.htm file format changed")
 
             # same in pre-0.3.0 Summary file and 0.3 summary file
-            self.lane = data[0]
+            self.lane = int(data[0])
 
             if len(data) == 8:
                 parsed_data = [ parse_mean_range(x) for x in data[1:] ]
@@ -81,7 +82,7 @@ class Summary(object):
         def get_elements(self):
             lane_result = ElementTree.Element(
                             Summary.LaneResultSummary.LANE_RESULT_SUMMARY,
-                            {'lane': self.lane})
+                            {'lane': str(self.lane), 'end': str(self.end)})
             for tag, variable_name in Summary.LaneResultSummary.TAGS.items():
                 value = getattr(self, variable_name)
                 if value is None:
@@ -102,7 +103,10 @@ class Summary(object):
             if tree.tag != Summary.LaneResultSummary.LANE_RESULT_SUMMARY:
                 raise ValueError('Expected %s' % (
                         Summary.LaneResultSummary.LANE_RESULT_SUMMARY))
-            self.lane = tree.attrib['lane']
+            self.lane = int(tree.attrib['lane'])
+            # default to the first end, for the older summary files
+            # that are single ended
+            self.end = int(tree.attrib.get('end', 0))
             tags = Summary.LaneResultSummary.TAGS
             for element in list(tree):
                 try:
@@ -113,7 +117,10 @@ class Summary(object):
                     logging.warn('Unrecognized tag %s' % (element.tag,))
 
     def __init__(self, filename=None, xml=None):
-        self.lane_results = {}
+        # lane results is a list of 1 or 2 ends containing
+        # a dictionary of all the lanes reported in this
+        # summary file
+        self.lane_results = [{}]
 
         if filename is not None:
             self._extract_lane_results(filename)
@@ -126,15 +133,6 @@ class Summary(object):
     def __len__(self):
         return len(self.lane_results)
 
-    def keys(self):
-        return self.lane_results.keys()
-
-    def values(self):
-        return self.lane_results.values()
-
-    def items(self):
-        return self.lane_results.items()
-
     def _flattened_row(self, row):
         """
         flatten the children of a <tr>...</tr>
@@ -178,14 +176,20 @@ class Summary(object):
         return tables
 
     def _extract_lane_results(self, pathname):
+        tables = self._extract_named_tables(pathname)
+        table_names = [ ('Lane Results Summary', 0),
+                        ('Lane Results Summary : Read 1', 0),
+                        ('Lane Results Summary : Read 2', 1),]
+        for name, end in table_names:
+          if tables.has_key(name):
+            self._extract_lane_results_for_end(tables, name, end)
+
+    def _extract_lane_results_for_end(self, tables, table_name, end):
         """
         extract the Lane Results Summary table
         """
-
-        tables = self._extract_named_tables(pathname)
-
         # parse lane result summary
-        lane_summary = tables['Lane Results Summary']
+        lane_summary = tables[table_name]
         # this is version 1 of the summary file
         if len(lane_summary[-1]) == 8:
             # strip header
@@ -200,15 +204,21 @@ class Summary(object):
             lane_summary = lane_summary[2:10]
             # after the last lane, there's a set of chip wide averages
 
+        # append an extra dictionary if needed
+        if len(self.lane_results) < (end + 1):
+          self.lane_results.append({})
+
         for r in lane_summary:
             lrs = Summary.LaneResultSummary(html=r)
-            self.lane_results[lrs.lane] = lrs
+            lrs.end = end
+            self.lane_results[lrs.end][lrs.lane] = lrs
 
     def get_elements(self):
         summary = ElementTree.Element(Summary.SUMMARY,
                                       {'version': unicode(Summary.XML_VERSION)})
-        for lane in self.lane_results.values():
-            summary.append(lane.get_elements())
+        for end in self.lane_results:
+            for lane in end.values():
+                summary.append(lane.get_elements())
         return summary
 
     def set_elements(self, tree):
@@ -220,7 +230,13 @@ class Summary(object):
         for element in list(tree):
             lrs = Summary.LaneResultSummary()
             lrs.set_elements(element)
-            self.lane_results[lrs.lane] = lrs
+            print lrs.end, lrs.lane
+            if len(self.lane_results) < (lrs.end + 1):
+              self.lane_results.append({})
+            self.lane_results[lrs.end][lrs.lane] = lrs
+
+    def is_paired_end(self):
+      return len(self.lane_results) == 2
 
     def dump(self):
         """
index 112201a281f6e46369eda926484480e2b17b5d7d..f6f3742117626698650b5c14055f437cc8d4baef 100644 (file)
@@ -1499,7 +1499,7 @@ def make_summary_paired_htm(gerald_dir):
 <tr>
 <td>1</td>
 <td>277083</td>
-<td>103646 +/- 4515</td>
+<td>103647 +/- 4516</td>
 <td>74887 +/- 6080</td>
 <td>277 +/- 17</td>
 <td>94.42 +/- 5.68</td>
@@ -1511,7 +1511,7 @@ def make_summary_paired_htm(gerald_dir):
 <tr>
 <td>2</td>
 <td>289563</td>
-<td>106678 +/- 4652</td>
+<td>106679 +/- 4653</td>
 <td>78260 +/- 2539</td>
 <td>259 +/- 13</td>
 <td>93.57 +/- 2.55</td>
@@ -1523,7 +1523,7 @@ def make_summary_paired_htm(gerald_dir):
 <tr>
 <td>3</td>
 <td>259242</td>
-<td>84583 +/- 5963</td>
+<td>84584 +/- 5964</td>
 <td>70065 +/- 4194</td>
 <td>252 +/- 12</td>
 <td>94.23 +/- 2.19</td>
@@ -1535,7 +1535,7 @@ def make_summary_paired_htm(gerald_dir):
 <tr>
 <td>4</td>
 <td>210549</td>
-<td>68813 +/- 4782</td>
+<td>68814 +/- 4783</td>
 <td>56905 +/- 4145</td>
 <td>226 +/- 16</td>
 <td>96.82 +/- 7.12</td>
@@ -1547,7 +1547,7 @@ def make_summary_paired_htm(gerald_dir):
 <tr>
 <td>5</td>
 <td>295555</td>
-<td>104854 +/- 4664</td>
+<td>104855 +/- 4665</td>
 <td>79879 +/- 6270</td>
 <td>200 +/- 24</td>
 <td>103.56 +/- 15.45</td>
@@ -1559,7 +1559,7 @@ def make_summary_paired_htm(gerald_dir):
 <tr>
 <td>6</td>
 <td>140401</td>
-<td>43555 +/- 1632</td>
+<td>43556 +/- 1633</td>
 <td>37946 +/- 2140</td>
 <td>179 +/- 10</td>
 <td>100.82 +/- 5.47</td>
@@ -1571,7 +1571,7 @@ def make_summary_paired_htm(gerald_dir):
 <tr>
 <td>7</td>
 <td>154217</td>
-<td>54265 +/- 1588</td>
+<td>54266 +/- 1589</td>
 <td>41680 +/- 5319</td>
 <td>184 +/- 5</td>
 <td>103.42 +/- 3.47</td>
@@ -1583,7 +1583,7 @@ def make_summary_paired_htm(gerald_dir):
 <tr>
 <td>8</td>
 <td>147969</td>
-<td>64363 +/- 2697</td>
+<td>64364 +/- 2698</td>
 <td>39991 +/- 6785</td>
 <td>206 +/- 31</td>
 <td>99.48 +/- 3.23</td>
index 7b1538142d002244b074a0ac46b571b55d5839e2..656b28186c9953b38d8d32043af854b982ee2770 100644 (file)
@@ -334,7 +334,7 @@ class RunfolderTests(unittest.TestCase):
 
         # test lane specific parameters from gerald config file
         for i in range(1,9):
-            cur_lane = g.lanes[str(i)]
+            cur_lane = g.lanes[i]
             self.failUnlessEqual(cur_lane.analysis, 'eland')
             self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
             self.failUnlessEqual(cur_lane.read_length, '32')
@@ -345,10 +345,11 @@ class RunfolderTests(unittest.TestCase):
                     (17421, 2139), (20311, 2402), (20193, 2399), (15537, 2531),
                     (32047, 3356), (32946, 4753), (39504, 4171), (37998, 3792)]
 
+        self.failUnlessEqual(len(g.summary), 1)
         for i in range(1,9):
-            summary_lane = g.summary[str(i)]
+            summary_lane = g.summary[0][i]
             self.failUnlessEqual(summary_lane.cluster, clusters[i])
-            self.failUnlessEqual(summary_lane.lane, str(i))
+            self.failUnlessEqual(summary_lane.lane, i)
 
         xml = g.get_elements()
         # just make sure that element tree can serialize the tree
@@ -363,17 +364,18 @@ class RunfolderTests(unittest.TestCase):
 
         # test lane specific parameters from gerald config file
         for i in range(1,9):
-            g_lane = g.lanes[str(i)]
-            g2_lane = g2.lanes[str(i)]
+            g_lane = g.lanes[i]
+            g2_lane = g2.lanes[i]
             self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
             self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
             self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
             self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
 
+        self.failUnlessEqual(len(g.summary), 1)
         # test (some) summary elements
         for i in range(1,9):
-            g_summary = g.summary[str(i)]
-            g2_summary = g2.summary[str(i)]
+            g_summary = g.summary[0][i]
+            g2_summary = g2.summary[0][i]
             self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
             self.failUnlessEqual(g_summary.lane, g2_summary.lane)
 
@@ -399,15 +401,15 @@ class RunfolderTests(unittest.TestCase):
         dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
                     'chr2L.fa': 'dm3/chr2L.fa',
                     'Lambda.fa': 'Lambda.fa'}
-        genome_maps = { '1':dm3_map, '2':dm3_map, '3':dm3_map, '4':dm3_map,
-                        '5':dm3_map, '6':dm3_map, '7':dm3_map, '8':dm3_map }
+        genome_maps = { 1:dm3_map, 2:dm3_map, 3:dm3_map, 4:dm3_map,
+                        5:dm3_map, 6:dm3_map, 7:dm3_map, 8:dm3_map }
         eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
         
         for i in range(1,9):
-            lane = eland[str(i)]
+            lane = eland[i]
             self.failUnlessEqual(lane.reads, 4)
             self.failUnlessEqual(lane.sample_name, "s")
-            self.failUnlessEqual(lane.lane_id, unicode(i))
+            self.failUnlessEqual(lane.lane_id, i)
             self.failUnlessEqual(len(lane.mapped_reads), 3)
             self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
             self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
@@ -420,8 +422,8 @@ class RunfolderTests(unittest.TestCase):
         e2 = gerald.ELAND(xml=xml)
 
         for i in range(1,9):
-            l1 = eland[str(i)]
-            l2 = e2[str(i)]
+            l1 = eland[i]
+            l2 = e2[i]
             self.failUnlessEqual(l1.reads, l2.reads)
             self.failUnlessEqual(l1.sample_name, l2.sample_name)
             self.failUnlessEqual(l1.lane_id, l2.lane_id)
index 69227644947635b2e8c1bf870324bc290cc5ad62..6e97ec63eaca3b40574c4162b88a3f62df649915 100644 (file)
@@ -756,7 +756,7 @@ class RunfolderTests(unittest.TestCase):
 
         # test lane specific parameters from gerald config file
         for i in range(1,9):
-            cur_lane = g.lanes[str(i)]
+            cur_lane = g.lanes[i]
             self.failUnlessEqual(cur_lane.analysis, 'eland')
             self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
             self.failUnlessEqual(cur_lane.read_length, '32')
@@ -769,10 +769,11 @@ class RunfolderTests(unittest.TestCase):
                     (119735, 8465), (152177, 8146),
                     (84649, 7325), (54622, 4812),]
 
+        self.failUnlessEqual(len(g.summary), 1)
         for i in range(1,9):
-            summary_lane = g.summary[str(i)]
+            summary_lane = g.summary[0][i]
             self.failUnlessEqual(summary_lane.cluster, clusters[i])
-            self.failUnlessEqual(summary_lane.lane, str(i))
+            self.failUnlessEqual(summary_lane.lane, i)
 
         xml = g.get_elements()
         # just make sure that element tree can serialize the tree
@@ -787,17 +788,18 @@ class RunfolderTests(unittest.TestCase):
 
         # test lane specific parameters from gerald config file
         for i in range(1,9):
-            g_lane = g.lanes[str(i)]
-            g2_lane = g2.lanes[str(i)]
+            g_lane = g.lanes[i]
+            g2_lane = g2.lanes[i]
             self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
             self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
             self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
             self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
 
         # test (some) summary elements
+        self.failUnlessEqual(len(g.summary), 1)
         for i in range(1,9):
-            g_summary = g.summary[str(i)]
-            g2_summary = g2.summary[str(i)]
+            g_summary = g.summary[0][i]
+            g2_summary = g2.summary[0][i]
             self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
             self.failUnlessEqual(g_summary.lane, g2_summary.lane)
 
@@ -823,15 +825,15 @@ class RunfolderTests(unittest.TestCase):
         dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa',
                     'chr2L.fa': 'dm3/chr2L.fa',
                     'Lambda.fa': 'Lambda.fa'}
-        genome_maps = { '1':dm3_map, '2':dm3_map, '3':dm3_map, '4':dm3_map,
-                        '5':dm3_map, '6':dm3_map, '7':dm3_map, '8':dm3_map }
+        genome_maps = { 1:dm3_map, 2:dm3_map, 3:dm3_map, 4:dm3_map,
+                        5:dm3_map, 6:dm3_map, 7:dm3_map, 8:dm3_map }
         eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
         
         for i in range(1,9):
-            lane = eland[str(i)]
+            lane = eland[i]
             self.failUnlessEqual(lane.reads, 4)
             self.failUnlessEqual(lane.sample_name, "s")
-            self.failUnlessEqual(lane.lane_id, unicode(i))
+            self.failUnlessEqual(lane.lane_id, i)
             self.failUnlessEqual(len(lane.mapped_reads), 3)
             self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
             self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
@@ -844,8 +846,8 @@ class RunfolderTests(unittest.TestCase):
         e2 = gerald.ELAND(xml=xml)
 
         for i in range(1,9):
-            l1 = eland[str(i)]
-            l2 = e2[str(i)]
+            l1 = eland[i]
+            l2 = e2[i]
             self.failUnlessEqual(l1.reads, l2.reads)
             self.failUnlessEqual(l1.sample_name, l2.sample_name)
             self.failUnlessEqual(l1.lane_id, l2.lane_id)
index e1f3fdd6c0cc60e81cc9ed1518320b074f10ec18..fba298166ce6ede01b5af8c0f89288afd536f94f 100644 (file)
@@ -137,7 +137,7 @@ class RunfolderTests(unittest.TestCase):
 
         # test lane specific parameters from gerald config file
         for i in range(1,9):
-            cur_lane = g.lanes[str(i)]
+            cur_lane = g.lanes[i]
             self.failUnlessEqual(cur_lane.analysis, 'eland')
             self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
             self.failUnlessEqual(cur_lane.read_length, '32')
@@ -158,10 +158,11 @@ class RunfolderTests(unittest.TestCase):
                     (247308, 11600), (204298, 15640),
                     (202707, 15404), (198075, 14702),]
 
+        self.failUnlessEqual(len(g.summary), 1)
         for i in range(1,9):
-            summary_lane = g.summary[str(i)]
+            summary_lane = g.summary[0][i]
             self.failUnlessEqual(summary_lane.cluster, clusters[i])
-            self.failUnlessEqual(summary_lane.lane, str(i))
+            self.failUnlessEqual(summary_lane.lane, i)
 
         xml = g.get_elements()
         # just make sure that element tree can serialize the tree
@@ -176,17 +177,18 @@ class RunfolderTests(unittest.TestCase):
 
         # test lane specific parameters from gerald config file
         for i in range(1,9):
-            g_lane = g.lanes[str(i)]
-            g2_lane = g2.lanes[str(i)]
+            g_lane = g.lanes[i]
+            g2_lane = g2.lanes[i]
             self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
             self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
             self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
             self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
 
+        self.failUnlessEqual(len(g.summary), 1)
         # test (some) summary elements
         for i in range(1,9):
-            g_summary = g.summary[str(i)]
-            g2_summary = g2.summary[str(i)]
+            g_summary = g.summary[0][i]
+            g2_summary = g2.summary[0][i]
             self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
             self.failUnlessEqual(g_summary.lane, g2_summary.lane)
 
@@ -215,15 +217,15 @@ class RunfolderTests(unittest.TestCase):
           long_name = 'hg18/chr%d.fa' % (i,)
           hg_map[short_name] = long_name
 
-        genome_maps = { '1':hg_map, '2':hg_map, '3':hg_map, '4':hg_map,
-                        '5':hg_map, '6':hg_map, '7':hg_map, '8':hg_map }
+        genome_maps = { 1:hg_map, 2:hg_map, 3:hg_map, 4:hg_map,
+                        5:hg_map, 6:hg_map, 7:hg_map, 8:hg_map }
         eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
 
         for i in range(1,9):
-            lane = eland[str(i)]
+            lane = eland[i]
             self.failUnlessEqual(lane.reads, 4)
             self.failUnlessEqual(lane.sample_name, "s")
-            self.failUnlessEqual(lane.lane_id, unicode(i))
+            self.failUnlessEqual(lane.lane_id, i)
             self.failUnlessEqual(len(lane.mapped_reads), 15)
             self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4)
             self.failUnlessEqual(lane.match_codes['U0'], 1)
@@ -241,8 +243,8 @@ class RunfolderTests(unittest.TestCase):
         e2 = gerald.ELAND(xml=xml)
 
         for i in range(1,9):
-            l1 = eland[str(i)]
-            l2 = e2[str(i)]
+            l1 = eland[i]
+            l2 = e2[i]
             self.failUnlessEqual(l1.reads, l2.reads)
             self.failUnlessEqual(l1.sample_name, l2.sample_name)
             self.failUnlessEqual(l1.lane_id, l2.lane_id)
index 0d546d118063f31119c1d2395529d057f9247eb0..76f1e64ecec0a886ae58a5d38e4a70b5c2d6880e 100644 (file)
@@ -134,7 +134,7 @@ class RunfolderTests(unittest.TestCase):
 
         # test lane specific parameters from gerald config file
         for i in range(1,9):
-            cur_lane = g.lanes[str(i)]
+            cur_lane = g.lanes[i]
             self.failUnlessEqual(cur_lane.analysis, 'eland')
             self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
             self.failUnlessEqual(cur_lane.read_length, '32')
@@ -153,10 +153,11 @@ class RunfolderTests(unittest.TestCase):
                     (119735, 8465), (152177, 8146),
                     (84649, 7325), (54622, 4812),]
 
+        self.failUnlessEqual(len(g.summary), 1)
         for i in range(1,9):
-            summary_lane = g.summary[str(i)]
+            summary_lane = g.summary[0][i]
             self.failUnlessEqual(summary_lane.cluster, clusters[i])
-            self.failUnlessEqual(summary_lane.lane, str(i))
+            self.failUnlessEqual(summary_lane.lane, i)
 
         xml = g.get_elements()
         # just make sure that element tree can serialize the tree
@@ -171,17 +172,18 @@ class RunfolderTests(unittest.TestCase):
 
         # test lane specific parameters from gerald config file
         for i in range(1,9):
-            g_lane = g.lanes[str(i)]
-            g2_lane = g2.lanes[str(i)]
+            g_lane = g.lanes[i]
+            g2_lane = g2.lanes[i]
             self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
             self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
             self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
             self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
 
         # test (some) summary elements
+        self.failUnlessEqual(len(g.summary), 1)
         for i in range(1,9):
-            g_summary = g.summary[str(i)]
-            g2_summary = g2.summary[str(i)]
+            g_summary = g.summary[0][i]
+            g2_summary = g2.summary[0][i]
             self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
             self.failUnlessEqual(g_summary.lane, g2_summary.lane)
 
@@ -210,15 +212,15 @@ class RunfolderTests(unittest.TestCase):
           long_name = 'hg18/chr%d.fa' % (i,)
           hg_map[short_name] = long_name
 
-        genome_maps = { '1':hg_map, '2':hg_map, '3':hg_map, '4':hg_map,
-                        '5':hg_map, '6':hg_map, '7':hg_map, '8':hg_map }
+        genome_maps = { 1:hg_map, 2:hg_map, 3:hg_map, 4:hg_map,
+                        5:hg_map, 6:hg_map, 7:hg_map, 8:hg_map }
         eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
 
         for i in range(1,9):
-            lane = eland[str(i)]
+            lane = eland[i]
             self.failUnlessEqual(lane.reads, 4)
             self.failUnlessEqual(lane.sample_name, "s")
-            self.failUnlessEqual(lane.lane_id, unicode(i))
+            self.failUnlessEqual(lane.lane_id, i)
             self.failUnlessEqual(len(lane.mapped_reads), 15)
             self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4)
             self.failUnlessEqual(lane.match_codes['U0'], 1)
@@ -236,8 +238,8 @@ class RunfolderTests(unittest.TestCase):
         e2 = gerald.ELAND(xml=xml)
 
         for i in range(1,9):
-            l1 = eland[str(i)]
-            l2 = e2[str(i)]
+            l1 = eland[i]
+            l2 = e2[i]
             self.failUnlessEqual(l1.reads, l2.reads)
             self.failUnlessEqual(l1.sample_name, l2.sample_name)
             self.failUnlessEqual(l1.lane_id, l2.lane_id)
index 20dc0d7be08158a08e650aeb3cc456591ac26606..783a5af7ece93c0accd70093503bd9178d120f34 100644 (file)
@@ -137,7 +137,7 @@ class RunfolderTests(unittest.TestCase):
 
         # test lane specific parameters from gerald config file
         for i in range(1,9):
-            cur_lane = g.lanes[str(i)]
+            cur_lane = g.lanes[i]
             self.failUnlessEqual(cur_lane.analysis, 'eland')
             self.failUnlessEqual(cur_lane.eland_genome, genomes[i])
             self.failUnlessEqual(cur_lane.read_length, '32')
@@ -150,16 +150,22 @@ class RunfolderTests(unittest.TestCase):
           self.failUnlessEqual(l.use_bases, 'Y'*32)
 
         # test data extracted from summary file
-        clusters = [None,
-                    (96483, 9074), (133738, 7938),
-                    (152142, 10002), (15784, 2162),
-                    (119735, 8465), (152177, 8146),
-                    (84649, 7325), (54622, 4812),]
-
-        for i in range(1,9):
-            summary_lane = g.summary[str(i)]
-            self.failUnlessEqual(summary_lane.cluster, clusters[i])
-            self.failUnlessEqual(summary_lane.lane, str(i))
+        clusters = [[None,
+                    (103646, 4515), (106678, 4652),
+                    (84583, 5963), (68813, 4782),
+                    (104854, 4664), (43555, 1632),
+                    (54265, 1588), (64363, 2697),],
+                    [None,
+                    (103647, 4516), (106679, 4653),
+                    (84584, 5964), (68814, 4783),
+                    (104855, 4665), (43556, 1633),
+                    (54266, 1589), (64364, 2698),],]
+
+        for end in [0,1]:
+            for lane in range(1,9):
+                summary_lane = g.summary[end][lane]
+                self.failUnlessEqual(summary_lane.cluster, clusters[end][lane])
+                self.failUnlessEqual(summary_lane.lane, lane)
 
         xml = g.get_elements()
         # just make sure that element tree can serialize the tree
@@ -174,36 +180,37 @@ class RunfolderTests(unittest.TestCase):
 
         # test lane specific parameters from gerald config file
         for i in range(1,9):
-            g_lane = g.lanes[str(i)]
-            g2_lane = g2.lanes[str(i)]
+            g_lane = g.lanes[i]
+            g2_lane = g2.lanes[i]
             self.failUnlessEqual(g_lane.analysis, g2_lane.analysis)
             self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome)
             self.failUnlessEqual(g_lane.read_length, g2_lane.read_length)
             self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases)
 
         # test (some) summary elements
-        for i in range(1,9):
-            g_summary = g.summary[str(i)]
-            g2_summary = g2.summary[str(i)]
-            self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
-            self.failUnlessEqual(g_summary.lane, g2_summary.lane)
-
-            g_eland = g.eland_results
-            g2_eland = g2.eland_results
-            for lane in g_eland.keys():
-                self.failUnlessEqual(g_eland[lane].reads,
-                                     g2_eland[lane].reads)
-                self.failUnlessEqual(len(g_eland[lane].mapped_reads),
-                                     len(g2_eland[lane].mapped_reads))
-                for k in g_eland[lane].mapped_reads.keys():
-                    self.failUnlessEqual(g_eland[lane].mapped_reads[k],
-                                         g2_eland[lane].mapped_reads[k])
-
-                self.failUnlessEqual(len(g_eland[lane].match_codes),
-                                     len(g2_eland[lane].match_codes))
-                for k in g_eland[lane].match_codes.keys():
-                    self.failUnlessEqual(g_eland[lane].match_codes[k],
-                                         g2_eland[lane].match_codes[k])
+        for end in [0,1]:
+            for i in range(1,9):
+                g_summary = g.summary[end][i]
+                g2_summary = g2.summary[end][i]
+                self.failUnlessEqual(g_summary.cluster, g2_summary.cluster)
+                self.failUnlessEqual(g_summary.lane, g2_summary.lane)
+
+                g_eland = g.eland_results
+                g2_eland = g2.eland_results
+                for lane in g_eland.keys():
+                    self.failUnlessEqual(g_eland[lane].reads,
+                                         g2_eland[lane].reads)
+                    self.failUnlessEqual(len(g_eland[lane].mapped_reads),
+                                         len(g2_eland[lane].mapped_reads))
+                    for k in g_eland[lane].mapped_reads.keys():
+                        self.failUnlessEqual(g_eland[lane].mapped_reads[k],
+                                             g2_eland[lane].mapped_reads[k])
+
+                    self.failUnlessEqual(len(g_eland[lane].match_codes),
+                                         len(g2_eland[lane].match_codes))
+                    for k in g_eland[lane].match_codes.keys():
+                        self.failUnlessEqual(g_eland[lane].match_codes[k],
+                                             g2_eland[lane].match_codes[k])
 
 
     def test_eland(self):
@@ -213,15 +220,15 @@ class RunfolderTests(unittest.TestCase):
           long_name = 'hg18/chr%d.fa' % (i,)
           hg_map[short_name] = long_name
 
-        genome_maps = { '1':hg_map, '2':hg_map, '3':hg_map, '4':hg_map,
-                        '5':hg_map, '6':hg_map, '7':hg_map, '8':hg_map }
+        genome_maps = { 1:hg_map, 2:hg_map, 3:hg_map, 4:hg_map,
+                        5:hg_map, 6:hg_map, 7:hg_map, 8:hg_map }
         eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps)
 
         for i in range(1,9):
-            lane = eland[str(i)]
+            lane = eland[i]
             self.failUnlessEqual(lane.reads, 4)
             self.failUnlessEqual(lane.sample_name, "s")
-            self.failUnlessEqual(lane.lane_id, unicode(i))
+            self.failUnlessEqual(lane.lane_id, i)
             self.failUnlessEqual(len(lane.mapped_reads), 15)
             self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4)
             self.failUnlessEqual(lane.match_codes['U0'], 1)
@@ -239,8 +246,8 @@ class RunfolderTests(unittest.TestCase):
         e2 = gerald.ELAND(xml=xml)
 
         for i in range(1,9):
-            l1 = eland[str(i)]
-            l2 = e2[str(i)]
+            l1 = eland[i]
+            l2 = e2[i]
             self.failUnlessEqual(l1.reads, l2.reads)
             self.failUnlessEqual(l1.sample_name, l2.sample_name)
             self.failUnlessEqual(l1.lane_id, l2.lane_id)
@@ -261,14 +268,15 @@ class RunfolderTests(unittest.TestCase):
 
         # do we get the flowcell id from the filename?
         self.failUnlessEqual(len(runs), 1)
-        name = 'run_207BTAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
+        # firecrest's date depends on filename not the create time.
+        name = 'run_207BTAAXX_2008-04-19.xml'
         self.failUnlessEqual(runs[0].name, name)
 
         # do we get the flowcell id from the FlowcellId.xml file
         make_flowcell_id(self.runfolder_dir, '207BTAAXY')
         runs = runfolder.get_runs(self.runfolder_dir)
         self.failUnlessEqual(len(runs), 1)
-        name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),)
+        name = 'run_207BTAAXY_2008-04-19.xml'
         self.failUnlessEqual(runs[0].name, name)
 
         r1 = runs[0]