Add the ability to specify a flowcell id instead of depending on autodetect.
[htsworkflow.git] / htsworkflow / pipelines / runfolder.py
index e1b474b6cd70846ee6f702ce01365a5c93db0190..a3e24f4048ce20ad10f9a6f86b6280bda9953bbb 100644 (file)
@@ -13,9 +13,9 @@ import tarfile
 import time
 
 try:
-  from xml.etree import ElementTree
+    from xml.etree import ElementTree
 except ImportError, e:
-  from elementtree import ElementTree
+    from elementtree import ElementTree
 
 EUROPEAN_STRPTIME = "%d-%m-%Y"
 EUROPEAN_DATE_RE = "([0-9]{1,2}-[0-9]{1,2}-[0-9]{4,4})"
@@ -38,13 +38,13 @@ class PipelineRun(object):
     PIPELINE_RUN = 'PipelineRun'
     FLOWCELL_ID = 'FlowcellID'
 
-    def __init__(self, pathname=None, xml=None):
+    def __init__(self, pathname=None, flowcell_id=None, xml=None):
         if pathname is not None:
           self.pathname = os.path.normpath(pathname)
         else:
           self.pathname = None
         self._name = None
-        self._flowcell_id = None
+        self._flowcell_id = flowcell_id
         self.image_analysis = None
         self.bustard = None
         self.gerald = None
@@ -55,23 +55,23 @@ class PipelineRun(object):
     def _get_flowcell_id(self):
         # extract flowcell ID
         if self._flowcell_id is None:
-          config_dir = os.path.join(self.pathname, 'Config')
-          flowcell_id_path = os.path.join(config_dir, 'FlowcellId.xml')
-         if os.path.exists(flowcell_id_path):
-            flowcell_id_tree = ElementTree.parse(flowcell_id_path)
-            self._flowcell_id = flowcell_id_tree.findtext('Text')
-         else:
-            path_fields = self.pathname.split('_')
-            if len(path_fields) > 0:
-              # guessing last element of filename
-              flowcell_id = path_fields[-1]
+            config_dir = os.path.join(self.pathname, 'Config')
+            flowcell_id_path = os.path.join(config_dir, 'FlowcellId.xml')
+            if os.path.exists(flowcell_id_path):
+                flowcell_id_tree = ElementTree.parse(flowcell_id_path)
+                self._flowcell_id = flowcell_id_tree.findtext('Text')
             else:
-              flowcell_id = 'unknown'
+                path_fields = self.pathname.split('_')
+                if len(path_fields) > 0:
+                    # guessing last element of filename
+                   flowcell_id = path_fields[-1]
+                else:
+                   flowcell_id = 'unknown'
 
-           logging.warning(
-             "Flowcell id was not found, guessing %s" % (
-                flowcell_id))
-           self._flowcell_id = flowcell_id
+                logging.warning(
+                  "Flowcell id was not found, guessing %s" % (
+                     flowcell_id))
+                self._flowcell_id = flowcell_id
         return self._flowcell_id
     flowcell_id = property(_get_flowcell_id)
 
@@ -152,7 +152,7 @@ def load_pipeline_run_xml(pathname):
     """
     Load and instantiate a Pipeline run from a run xml file
 
-    :Parameters: 
+    :Parameters:
       - `pathname` : location of an run xml file
 
     :Returns: initialized PipelineRun object
@@ -161,7 +161,7 @@ def load_pipeline_run_xml(pathname):
     run = PipelineRun(xml=tree)
     return run
 
-def get_runs(runfolder):
+def get_runs(runfolder, flowcell_id=None):
     """
     Search through a run folder for all the various sub component runs
     and then return a PipelineRun for each different combination.
@@ -189,7 +189,7 @@ def get_runs(runfolder):
                 logging.info("Found gerald directory %s" % (gerald_pathname,))
                 try:
                     g = gerald.gerald(gerald_pathname)
-                    p = PipelineRun(runfolder)
+                    p = PipelineRun(runfolder, flowcell_id)
                     p.image_analysis = image_analysis
                     p.bustard = b
                     p.gerald = g
@@ -273,7 +273,7 @@ def get_specific_run(gerald_dir):
     elif re.search('Intensities', short_image_dir, re.IGNORECASE) is not None:
         image_run = ipar.ipar(image_dir)
 
-    # if we din't find a run, report the error and return 
+    # if we din't find a run, report the error and return
     if image_run is None:
         msg = '%s does not contain an image processing step' % (image_dir,)
         logging.error(msg)