Handle the case when a sequencing lane lacks any yield information.
authorDiane Trout <diane@caltech.edu>
Fri, 24 Sep 2010 22:34:16 +0000 (22:34 +0000)
committerDiane Trout <diane@caltech.edu>
Fri, 24 Sep 2010 22:34:16 +0000 (22:34 +0000)
(For instance if we only run one lane of analysis on a flowcell)

htsworkflow/pipelines/eland.py
htsworkflow/pipelines/runfolder.py
htsworkflow/pipelines/summary.py

index f291f0fa974c6353f10f7283db8e52354322e53f..271a6517f632209612fb27b6647d54bec921386d 100644 (file)
@@ -75,6 +75,8 @@ class ResultLane(object):
         return self._reads
     reads = property(_get_reads)
 
+    def get_elements(self):
+        return None
 
 class ElandLane(ResultLane):
     """
@@ -552,9 +554,10 @@ class ELAND(object):
            end_results = self.results[end]
            for lane_id, lane in end_results.items():
                 eland_lane = lane.get_elements()
-                eland_lane.attrib[ELAND.END] = unicode (end)
-                eland_lane.attrib[ELAND.LANE_ID] = unicode(lane_id)
-                root.append(eland_lane)
+                if eland_lane is not None:
+                    eland_lane.attrib[ELAND.END] = unicode (end)
+                    eland_lane.attrib[ELAND.LANE_ID] = unicode(lane_id)
+                    root.append(eland_lane)
         return root
 
     def set_elements(self, tree):
index 038ae5bbd6c57a60e77fbcb73f934a21fd50dc54..016ae873b3579966ad40ad8c1da5b3e44bd4aeed 100644 (file)
@@ -471,25 +471,29 @@ def compress_eland_results(gerald_object, cycle_dir, num_jobs=1):
     for lanes_dictionary in gerald_object.eland_results.results:
         for eland_lane in lanes_dictionary.values():
             source_name = eland_lane.pathname
-            path, name = os.path.split(eland_lane.pathname)
-            dest_name = os.path.join(cycle_dir, name)
-            logging.info("Saving eland file %s to %s" % \
+            if source_name is None:
+              logging.info(
+                "Lane ID %s does not have a filename." % (eland_lane.lane_id,))
+            else:
+              path, name = os.path.split(source_name)
+              dest_name = os.path.join(cycle_dir, name)
+              logging.info("Saving eland file %s to %s" % \
                          (source_name, dest_name))
             
-            if is_compressed(name):
-              logging.info('Already compressed, Saving to %s' % (dest_name, ))
-              shutil.copy(source_name, dest_name)
-            else:
-              # not compressed
-              dest_name += '.bz2'
-              args = ['bzip2', '-9', '-c', source_name, '>', dest_name ]
-              bz_commands.append(" ".join(args))
-              #logging.info('Running: %s' % ( " ".join(args) ))
-              #bzip_dest = open(dest_name, 'w')
-              #bzip = subprocess.Popen(args, stdout=bzip_dest)
-              #logging.info('Saving to %s' % (dest_name, ))
-              #bzip.wait()
-              
+              if is_compressed(name):
+                logging.info('Already compressed, Saving to %s' % (dest_name, ))
+                shutil.copy(source_name, dest_name)
+              else:
+                # not compressed
+                dest_name += '.bz2'
+                args = ['bzip2', '-9', '-c', source_name, '>', dest_name ]
+                bz_commands.append(" ".join(args))
+                #logging.info('Running: %s' % ( " ".join(args) ))
+                #bzip_dest = open(dest_name, 'w')
+                #bzip = subprocess.Popen(args, stdout=bzip_dest)
+                #logging.info('Saving to %s' % (dest_name, ))
+                #bzip.wait()
+                
     if len(bz_commands) > 0:
       q = QueueCommands(bz_commands, num_jobs)
       q.run()
@@ -517,6 +521,7 @@ def extract_results(runs, output_base_dir=None, site="individual", num_jobs=1):
       cycle = "C%d-%d" % (r.image_analysis.start, r.image_analysis.stop)
       logging.info("Filling in %s" % (cycle,))
       cycle_dir = os.path.join(result_dir, cycle)
+      cycle_dir = os.path.abspath(cycle_dir)
       if os.path.exists(cycle_dir):
         logging.error("%s already exists, not overwriting" % (cycle_dir,))
         continue
@@ -535,7 +540,11 @@ def extract_results(runs, output_base_dir=None, site="individual", num_jobs=1):
 
       # build base call saving commands
       if site is not None:
-        lanes = range(1,9)
+        lanes = []
+        for lane in range(1,9):
+          if r.gerald.lanes[lane].analysis != 'none':
+            lanes.append(lane)
+
         run_name = srf.pathname_to_run_name(r.pathname)
         srf_cmds = srf.make_qseq_commands(run_name, r.bustard.pathname, lanes, site, cycle_dir)
         srf.run_commands(r.bustard.pathname, srf_cmds, num_jobs)
index 414609905d26e82e98ad8f3c3244f65c821b8c8a..fb8efdc9bd4671c89b6a1c72d7c9e2abce3ba9ff 100644 (file)
@@ -98,7 +98,11 @@ class Summary(object):
         def set_elements_from_gerald_xml(self, read, element):
             self.lane = int(element.find('laneNumber').text)
             self.end = read
-            self.lane_yield = int(element.find('laneYield').text)
+            lane_yield_node = element.find('laneYield')
+            if lane_yield_node is not None:
+                self.lane_yield = int(lane_yield_node.text)
+            else:
+                self.lane_yield = None
 
             for GeraldName, LRSName in Summary.LaneResultSummary.GERALD_TAGS.items():
                 node = element.find(GeraldName)
@@ -380,8 +384,18 @@ def parse_xml_mean_range(element):
     stddev = element.find('stdev')
     if mean is None or stddev is None:
         raise RuntimeError("Summary.xml file format changed, expected mean/stddev tags")
+    if mean.text is None: 
+        mean_value = float('nan')
+    else:
+        mean_value = tonumber(mean.text)
+
+    if stddev.text is None: 
+        stddev_value = float('nan')
+    else:
+        stddev_value = tonumber(stddev.text)
+
 
-    return (tonumber(mean.text), tonumber(stddev.text))
+    return (mean_value, stddev_value)
 
 if __name__ == "__main__":
     # test code