Try to make runfolder results extraction more robust

author Diane Trout <diane@caltech.edu>

Fri, 30 Jan 2009 02:15:57 +0000 (02:15 +0000)

committer Diane Trout <diane@caltech.edu>

Fri, 30 Jan 2009 02:15:57 +0000 (02:15 +0000)
author Diane Trout <diane@caltech.edu>
Fri, 30 Jan 2009 02:15:57 +0000 (02:15 +0000)
committer Diane Trout <diane@caltech.edu>
Fri, 30 Jan 2009 02:15:57 +0000 (02:15 +0000)
diff --git a/htsworkflow/pipelines/firecrest.py b/htsworkflow/pipelines/firecrest.py

index ee6fded6371c45b02c10c40d3c16df76923a52a1..4fbde5d517ac661cfecd68ff8ab2fff2472f5d9f 100644 (file)
--- a/htsworkflow/pipelines/firecrest.py
+++ b/htsworkflow/pipelines/firecrest.py
@@ -115,6 +115,8 @@ def firecrest(pathname):
      # should I parse this deeper than just stashing the 
      # contents of the matrix file?
      matrix_pathname = os.path.join(pathname, 'Matrix', 's_matrix.txt')
+    if not os.path.exists(matrix_pathname):
+        return None
      f.matrix = open(matrix_pathname, 'r').read()
      return f
  
diff --git a/htsworkflow/pipelines/gerald.py b/htsworkflow/pipelines/gerald.py

index a5dd323861beb46549fc3b98be0f6af77eec6e84..cbc5fcb92d7fb26948f6137ab4f897ad749e129e 100644 (file)
--- a/htsworkflow/pipelines/gerald.py
+++ b/htsworkflow/pipelines/gerald.py
@@ -41,7 +41,10 @@ class Gerald(object):
              if len(container.getchildren()) > LANES_PER_FLOWCELL:
                  raise RuntimeError('GERALD config.xml file changed')
              lanes = [x.tag.split('_')[1] for x in container.getchildren()]
-            index = lanes.index(self._lane_id)
+            try:
+                index = lanes.index(self._lane_id)
+            except ValueError, e:
+                return None
              element = container[index]
              return element.text
          def _get_analysis(self):
diff --git a/htsworkflow/pipelines/ipar.py b/htsworkflow/pipelines/ipar.py

index a559229ee83957068c8c96dcc3f538980e4fc9b4..3d90868910a37d16d4b9b7f70a042294553f0485 100644 (file)
--- a/htsworkflow/pipelines/ipar.py
+++ b/htsworkflow/pipelines/ipar.py
@@ -193,6 +193,8 @@ def ipar(pathname):
  
      # contents of the matrix file?
      matrix_pathname = os.path.join(pathname, 'Matrix', 's_matrix.txt')
+    if not os.path.exists(matrix_pathname):
+        return None
      i.matrix = open(matrix_pathname, 'r').read()
  
      # look for parameter xml file
@@ -222,4 +224,4 @@ if __name__ == "__main__":
    print i.tiles.keys()
    print j.tiles.keys()
    print j.tiles.items()
-  print j.file_list()
-\ No newline at end of file
+  print j.file_list()
diff --git a/htsworkflow/pipelines/runfolder.py b/htsworkflow/pipelines/runfolder.py

index f327b7891868034ca38c8327bdeed5ed2efdd135..10eeee1ccef5d1e7fda2c4100af01713d74910cd 100644 (file)
--- a/htsworkflow/pipelines/runfolder.py
+++ b/htsworkflow/pipelines/runfolder.py
@@ -178,12 +178,26 @@ def get_runs(runfolder):
      for firecrest_pathname in glob(os.path.join(datadir,"*Firecrest*")):
          logging.info('Found firecrest in ' + datadir)
          image_analysis = firecrest.firecrest(firecrest_pathname)
-        scan_post_image_analysis(runs, runfolder, image_analysis, firecrest_pathname)
+        if image_analysis is None:
+           logging.warn(
+                "%s is an empty or invalid firecrest directory" % (firecrest_pathname,)
+            )
+       else:
+            scan_post_image_analysis(
+                runs, runfolder, image_analysis, firecrest_pathname
+            )
      # scan for IPAR directories
      for ipar_pathname in glob(os.path.join(datadir,"IPAR_*")):
          logging.info('Found ipar directories in ' + datadir)
          image_analysis = ipar.ipar(ipar_pathname)
-        scan_post_image_analysis(runs, runfolder, image_analysis, ipar_pathname)
+        if image_analysis is None:
+           logging.warn(
+                "%s is an empty or invalid IPAR directory" %(ipar_pathname,)
+            )
+       else:
+            scan_post_image_analysis(
+                runs, runfolder, image_analysis, ipar_pathname
+            )
  
      return runs
  
diff --git a/scripts/runfolder b/scripts/runfolder

index 0abbfe1c5128d1693c3443cebf8338d3023f9114..bf5c5d884d2c931a6b96040a477b6bc0577ab30f 100644 (file)
--- a/scripts/runfolder
+++ b/scripts/runfolder
@@ -27,6 +27,7 @@ runfolder.py can also spit out a simple summary report (-s option)
  that contains the per lane post filter cluster numbers and the mapped 
  read counts. (The report isn't currently very pretty)
  """
+from glob import glob
  import logging
  import optparse
  import os
@@ -50,9 +51,13 @@ def make_parser():
      parser.add_option('--extract-results', action='store_true',
             default=False,
             help='extract result files out of runfolder into a simpler archive')
+    parser.add_option('-o', '--output-dir', default=None,
+           help="specify the default output directory for extract results")
+
      parser.add_option('--run-xml', dest='run_xml',
             default=None,
             help='specify a run_<FlowCell>.xml file for summary reports')
+    
  
      return parser
  
@@ -65,14 +70,17 @@ def main(cmdlist=None):
          root_log = logging.getLogger()
          root_log.setLevel(logging.INFO)
  
+    logging.info('Starting htsworkflow illumina runfolder processing tool.')
      runs = []
      if opt.run_xml:
          # handle ~ shortcut
          opt.run_xml = os.path.expanduser(opt.run_xml)
          tree = ElementTree.parse(opt.run_xml).getroot()
          runs.append(runfolder.PipelineRun(xml=tree))
-    for run_dir in args:
-        runs.extend(runfolder.get_runs(run_dir))
+    for run_pattern in args:
+        # expand args on our own if needed
+        for run_dir in glob(run_pattern):
+            runs.extend(runfolder.get_runs(run_dir))
  
      if len(runs) > 0:
          if opt.summary:
@@ -80,7 +88,7 @@ def main(cmdlist=None):
          if opt.archive:
              runfolder.extract_run_parameters(runs)
          if opt.extract_results:
-            runfolder.extract_results(runs)
+            runfolder.extract_results(runs, opt.output_dir)
  
      return 0
author	Diane Trout <diane@caltech.edu>
	Fri, 30 Jan 2009 02:15:57 +0000 (02:15 +0000)
committer	Diane Trout <diane@caltech.edu>
	Fri, 30 Jan 2009 02:15:57 +0000 (02:15 +0000)
htsworkflow/pipelines/firecrest.py		patch \| blob \| history
htsworkflow/pipelines/gerald.py		patch \| blob \| history
htsworkflow/pipelines/ipar.py		patch \| blob \| history
htsworkflow/pipelines/runfolder.py		patch \| blob \| history
scripts/runfolder		patch \| blob \| history