Add the ability to specify a flowcell id instead of depending on autodetect.

[htsworkflow.git] / htsworkflow / pipelines / runfolder.py
diff --git a/htsworkflow/pipelines/runfolder.py b/htsworkflow/pipelines/runfolder.py

index e1b474b6cd70846ee6f702ce01365a5c93db0190..a3e24f4048ce20ad10f9a6f86b6280bda9953bbb 100644 (file)
--- a/htsworkflow/pipelines/runfolder.py
+++ b/htsworkflow/pipelines/runfolder.py
@@ -13,9 +13,9 @@ import tarfile
  import time
  
  try:
-  from xml.etree import ElementTree
+    from xml.etree import ElementTree
  except ImportError, e:
-  from elementtree import ElementTree
+    from elementtree import ElementTree
  
  EUROPEAN_STRPTIME = "%d-%m-%Y"
  EUROPEAN_DATE_RE = "([0-9]{1,2}-[0-9]{1,2}-[0-9]{4,4})"
@@ -38,13 +38,13 @@ class PipelineRun(object):
      PIPELINE_RUN = 'PipelineRun'
      FLOWCELL_ID = 'FlowcellID'
  
-    def __init__(self, pathname=None, xml=None):
+    def __init__(self, pathname=None, flowcell_id=None, xml=None):
          if pathname is not None:
            self.pathname = os.path.normpath(pathname)
          else:
            self.pathname = None
          self._name = None
-        self._flowcell_id = None
+        self._flowcell_id = flowcell_id
          self.image_analysis = None
          self.bustard = None
          self.gerald = None
@@ -55,23 +55,23 @@ class PipelineRun(object):
      def _get_flowcell_id(self):
          # extract flowcell ID
          if self._flowcell_id is None:
-          config_dir = os.path.join(self.pathname, 'Config')
-          flowcell_id_path = os.path.join(config_dir, 'FlowcellId.xml')
-         if os.path.exists(flowcell_id_path):
-            flowcell_id_tree = ElementTree.parse(flowcell_id_path)
-            self._flowcell_id = flowcell_id_tree.findtext('Text')
-         else:
-            path_fields = self.pathname.split('_')
-            if len(path_fields) > 0:
-              # guessing last element of filename
-              flowcell_id = path_fields[-1]
+            config_dir = os.path.join(self.pathname, 'Config')
+            flowcell_id_path = os.path.join(config_dir, 'FlowcellId.xml')
+            if os.path.exists(flowcell_id_path):
+                flowcell_id_tree = ElementTree.parse(flowcell_id_path)
+                self._flowcell_id = flowcell_id_tree.findtext('Text')
              else:
-              flowcell_id = 'unknown'
+                path_fields = self.pathname.split('_')
+                if len(path_fields) > 0:
+                    # guessing last element of filename
+                   flowcell_id = path_fields[-1]
+                else:
+                   flowcell_id = 'unknown'
  
-           logging.warning(
-             "Flowcell id was not found, guessing %s" % (
-                flowcell_id))
-           self._flowcell_id = flowcell_id
+                logging.warning(
+                  "Flowcell id was not found, guessing %s" % (
+                     flowcell_id))
+                self._flowcell_id = flowcell_id
          return self._flowcell_id
      flowcell_id = property(_get_flowcell_id)
  
@@ -152,7 +152,7 @@ def load_pipeline_run_xml(pathname):
      """
      Load and instantiate a Pipeline run from a run xml file
  
-    :Parameters: 
+    :Parameters:
        - `pathname` : location of an run xml file
  
      :Returns: initialized PipelineRun object
@@ -161,7 +161,7 @@ def load_pipeline_run_xml(pathname):
      run = PipelineRun(xml=tree)
      return run
  
-def get_runs(runfolder):
+def get_runs(runfolder, flowcell_id=None):
      """
      Search through a run folder for all the various sub component runs
      and then return a PipelineRun for each different combination.
@@ -189,7 +189,7 @@ def get_runs(runfolder):
                  logging.info("Found gerald directory %s" % (gerald_pathname,))
                  try:
                      g = gerald.gerald(gerald_pathname)
-                    p = PipelineRun(runfolder)
+                    p = PipelineRun(runfolder, flowcell_id)
                      p.image_analysis = image_analysis
                      p.bustard = b
                      p.gerald = g
@@ -273,7 +273,7 @@ def get_specific_run(gerald_dir):
      elif re.search('Intensities', short_image_dir, re.IGNORECASE) is not None:
          image_run = ipar.ipar(image_dir)
  
-    # if we din't find a run, report the error and return 
+    # if we din't find a run, report the error and return
      if image_run is None:
          msg = '%s does not contain an image processing step' % (image_dir,)
          logging.error(msg)