Merge branch 'master' of mus.cacr.caltech.edu:htsworkflow

[htsworkflow.git] / htsworkflow / pipelines / runfolder.py
diff --git a/htsworkflow/pipelines/runfolder.py b/htsworkflow/pipelines/runfolder.py

index 568e683ff2757bb65f9315f5059317cc900697c0..4a2b4cdc3704fccb1b7fbdd5f48211c86851b6c4 100644 (file)
--- a/htsworkflow/pipelines/runfolder.py
+++ b/htsworkflow/pipelines/runfolder.py
@@ -13,9 +13,9 @@ import tarfile
  import time
  
  try:
-  from xml.etree import ElementTree
+    from xml.etree import ElementTree
  except ImportError, e:
-  from elementtree import ElementTree
+    from elementtree import ElementTree
  
  LOGGER = logging.getLogger(__name__)
  
@@ -40,13 +40,13 @@ class PipelineRun(object):
      PIPELINE_RUN = 'PipelineRun'
      FLOWCELL_ID = 'FlowcellID'
  
-    def __init__(self, pathname=None, xml=None):
+    def __init__(self, pathname=None, flowcell_id=None, xml=None):
          if pathname is not None:
            self.pathname = os.path.normpath(pathname)
          else:
            self.pathname = None
          self._name = None
-        self._flowcell_id = None
+        self._flowcell_id = flowcell_id
          self.image_analysis = None
          self.bustard = None
          self.gerald = None
@@ -57,23 +57,23 @@ class PipelineRun(object):
      def _get_flowcell_id(self):
          # extract flowcell ID
          if self._flowcell_id is None:
-          config_dir = os.path.join(self.pathname, 'Config')
-          flowcell_id_path = os.path.join(config_dir, 'FlowcellId.xml')
-         if os.path.exists(flowcell_id_path):
-            flowcell_id_tree = ElementTree.parse(flowcell_id_path)
-            self._flowcell_id = flowcell_id_tree.findtext('Text')
-         else:
-            path_fields = self.pathname.split('_')
-            if len(path_fields) > 0:
-              # guessing last element of filename
-              flowcell_id = path_fields[-1]
+            config_dir = os.path.join(self.pathname, 'Config')
+            flowcell_id_path = os.path.join(config_dir, 'FlowcellId.xml')
+            if os.path.exists(flowcell_id_path):
+                flowcell_id_tree = ElementTree.parse(flowcell_id_path)
+                self._flowcell_id = flowcell_id_tree.findtext('Text')
              else:
-              flowcell_id = 'unknown'
+                path_fields = self.pathname.split('_')
+                if len(path_fields) > 0:
+                    # guessing last element of filename
+                   self._flowcell_id = path_fields[-1]
+                else:
+                   self._flowcell_id = 'unknown'
+
+                   LOGGER.warning(
+                       "Flowcell id was not found, guessing %s" % (
+                       self._flowcell_id))
  
-           LOGGER.warning(
-             "Flowcell id was not found, guessing %s" % (
-                flowcell_id))
-           self._flowcell_id = flowcell_id
          return self._flowcell_id
      flowcell_id = property(_get_flowcell_id)
  
@@ -163,7 +163,7 @@ def load_pipeline_run_xml(pathname):
      run = PipelineRun(xml=tree)
      return run
  
-def get_runs(runfolder):
+def get_runs(runfolder, flowcell_id=None):
      """
      Search through a run folder for all the various sub component runs
      and then return a PipelineRun for each different combination.
@@ -191,7 +191,7 @@ def get_runs(runfolder):
                  LOGGER.info("Found gerald directory %s" % (gerald_pathname,))
                  try:
                      g = gerald.gerald(gerald_pathname)
-                    p = PipelineRun(runfolder)
+                    p = PipelineRun(runfolder, flowcell_id)
                      p.image_analysis = image_analysis
                      p.bustard = b
                      p.gerald = g
@@ -616,9 +616,17 @@ def clean_runs(runs, dry_run=True):
          LOGGER.info("Cleaning images")
          image_dirs = glob(os.path.join(run.pathname, 'Images', 'L*'))
          rm_list(image_dirs, dry_run)
-        # cd Data/C1-*_Firecrest*
-        LOGGER.info("Cleaning intermediate files")
+        # rm ReadPrep
+        LOGGER.info("Cleaning ReadPrep*")
+        read_prep_dirs = glob(os.path.join(run.pathname, 'ReadPrep*'))
+        rm_list(read_prep_dirs, dry_run)
+        # rm ReadPrep
+        LOGGER.info("Cleaning Thubmnail_images")
+        thumbnail_dirs = glob(os.path.join(run.pathname, 'Thumbnail_Images'))
+        rm_list(thumbnail_dirs, dry_run)
+
          # make clean_intermediate
+        logging.info("Cleaning intermediate files")
          if os.path.exists(os.path.join(run.image_analysis.pathname, 'Makefile')):
              clean_process = subprocess.Popen(['make', 'clean_intermediate'],
                                               cwd=run.image_analysis.pathname,)