Keep track of sample_name and lane_id computed from the eland
authorDiane Trout <diane@caltech.edu>
Sat, 10 May 2008 00:18:24 +0000 (00:18 +0000)
committerDiane Trout <diane@caltech.edu>
Sat, 10 May 2008 00:18:24 +0000 (00:18 +0000)
filename.

Perhaps I should have more code checking to make sure its of the form
s_?_eland_result.txt

gaworkflow/pipeline/gerald.py
gaworkflow/pipeline/test/test_runfolder.py

index 990874ce1076bb65cfe44a79beff3f615e36fb4f..5480f552878cd472c37cbf3d4eb4809174e691e5 100644 (file)
@@ -377,6 +377,8 @@ class ElandLane(object):
     """
     XML_VERSION = 1
     LANE = 'ElandLane'
+    SAMPLE_NAME = 'SampleName'
+    LANE_ID = 'LaneID'
     GENOME_MAP = 'GenomeMap'
     GENOME_ITEM = 'GenomeItem'
     MAPPED_READS = 'MappedReads'
@@ -387,6 +389,8 @@ class ElandLane(object):
 
     def __init__(self, pathname=None, genome_map=None, xml=None):
         self.pathname = pathname
+        self.sample_name = None
+        self.lane_id = None
         self._reads = None
         self._mapped_reads = {}
         self._match_codes = {}
@@ -407,6 +411,12 @@ class ElandLane(object):
         if self.pathname is None:
             return
 
+        # extract the sample name
+        path, name = os.path.split(self.pathname)
+        split_name = name.split('_')
+        self.sample_name = split_name[0]
+        self.lane_id = split_name[1]
+
         if os.stat(self.pathname)[stat.ST_SIZE] == 0:
             raise RuntimeError("Eland isn't done, try again later.")
 
@@ -455,6 +465,10 @@ class ElandLane(object):
         lane = ElementTree.Element(ElandLane.LANE, 
                                    {'version': 
                                     unicode(ElandLane.XML_VERSION)})
+        sample_tag = ElementTree.SubElement(lane, ElandLane.SAMPLE_NAME)
+        sample_tag.text = self.sample_name
+        lane_tag = ElementTree.SubElement(lane, ElandLane.LANE_ID)
+        lane_tag.text = self.lane_id
         genome_map = ElementTree.SubElement(lane, ElandLane.GENOME_MAP)
         for k, v in self.genome_map.items():
             item = ElementTree.SubElement(
@@ -480,7 +494,11 @@ class ElandLane(object):
             raise ValueError('Exptecting %s' % (ElandLane.LANE,))
         for element in tree:
             tag = element.tag.lower()
-            if tag == ElandLane.GENOME_MAP.lower():
+            if tag == ElandLane.SAMPLE_NAME.lower():
+                self.sample_name = element.text
+            elif tag == ElandLane.LANE_ID.lower():
+                self.lane_id = element.text
+            elif tag == ElandLane.GENOME_MAP.lower():
                 for child in element:
                     name = child.attrib['name']
                     value = child.attrib['value']
@@ -553,10 +571,12 @@ class ELAND(object):
 def eland(basedir, gerald=None, genome_maps=None):
     e = ELAND()
     for pathname in glob(os.path.join(basedir, "*_eland_result.txt")):
-        # extract the sample name
+        # yes the lane_id is also being computed in ElandLane._update
+        # I didn't want to clutter up my constructor
+        # but I needed to persist the sample_name/lane_id for
+        # runfolder summary_report
         path, name = os.path.split(pathname)
         split_name = name.split('_')
-        sample_name = split_name[0]
         lane_id = split_name[1]
 
         if genome_maps is not None:
index 5cd6ac5904339efb6ee862bbd7af11f363af1dfd..f539cfa0421feb2a16be04f90cfda27dca01dbfc 100644 (file)
@@ -538,6 +538,8 @@ class RunfolderTests(unittest.TestCase):
         for i in range(1,9):
             lane = eland[str(i)]
             self.failUnlessEqual(lane.reads, 4)
+            self.failUnlessEqual(lane.sample_name, "s")
+            self.failUnlessEqual(lane.lane_id, unicode(i))
             self.failUnlessEqual(len(lane.mapped_reads), 3)
             self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1)
             self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1)
@@ -553,6 +555,8 @@ class RunfolderTests(unittest.TestCase):
             l1 = eland[str(i)]
             l2 = e2[str(i)]
             self.failUnlessEqual(l1.reads, l2.reads)
+            self.failUnlessEqual(l1.sample_name, l2.sample_name)
+            self.failUnlessEqual(l1.lane_id, l2.lane_id)
             self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads))
             self.failUnlessEqual(len(l1.mapped_reads), 3)
             for k in l1.mapped_reads.keys():