Use named tuple for sequence flowcell/start/stop/project dir extractor

author Diane Trout <diane@caltech.edu>

Mon, 5 Mar 2012 22:16:11 +0000 (14:16 -0800)

committer Diane Trout <diane@caltech.edu>

Mon, 5 Mar 2012 22:18:02 +0000 (14:18 -0800)
author Diane Trout <diane@caltech.edu>
Mon, 5 Mar 2012 22:16:11 +0000 (14:16 -0800)
committer Diane Trout <diane@caltech.edu>
Mon, 5 Mar 2012 22:18:02 +0000 (14:18 -0800)
diff --git a/htsworkflow/pipelines/sequences.py b/htsworkflow/pipelines/sequences.py

index f21e48c66aea85a3a6f71fdfab93e6dfcde40c7e..f3cc9fe6df28a3513d57f0f24f1b67939f429879 100644 (file)
--- a/htsworkflow/pipelines/sequences.py
+++ b/htsworkflow/pipelines/sequences.py
@@ -1,8 +1,10 @@
  """
  Utilities to work with the various eras of sequence archive files
  """
+import collections
  import logging
  import os
+import types
  import re
  
  LOGGER = logging.getLogger(__name__)
@@ -29,6 +31,9 @@ CREATE TABLE %(table)s (
  """ %( {'table': SEQUENCE_TABLE_NAME} )
      return cursor.execute(sql)
  
+FlowcellPath = collections.namedtuple('FlowcellPath',
+                                      'flowcell start stop project')
+
  class SequenceFile(object):
      """
      Simple container class that holds the path to a sequence archive
@@ -118,6 +123,7 @@ def get_flowcell_cycle(path):
      """
      Extract flowcell, cycle from pathname
      """
+    path = os.path.normpath(path)
      project = None
      rest, tail = os.path.split(path)
      if tail.startswith('Project_'):
@@ -140,7 +146,7 @@ def get_flowcell_cycle(path):
      if stop is not None:
          stop = int(stop)
  
-    return flowcell, start, stop, project
+    return FlowcellPath(flowcell, start, stop, project)
  
  def parse_srf(path, filename):
      flowcell_dir, start, stop, project = get_flowcell_cycle(path)
@@ -243,6 +249,9 @@ def scan_for_sequences(dirs):
      Scan through a list of directories for sequence like files
      """
      sequences = []
+    if type(dirs) in types.StringTypes:
+        raise ValueError("You probably want a list or set, not a string")
+
      for d in dirs:
          LOGGER.info("Scanning %s for sequences" % (d,))
          if not os.path.exists(d):
diff --git a/htsworkflow/pipelines/test/test_sequences.py b/htsworkflow/pipelines/test/test_sequences.py

index cede8c2b82644adf88bcfa13bb075ac1bce1f58a..157246a94e3eae277e78df2813f5670aabb89621 100644 (file)
--- a/htsworkflow/pipelines/test/test_sequences.py
+++ b/htsworkflow/pipelines/test/test_sequences.py
@@ -4,10 +4,27 @@ import unittest
  
  from htsworkflow.pipelines import sequences
  
+
  class SequenceFileTests(unittest.TestCase):
      """
      Make sure the sequence archive class works
      """
+    def test_get_flowcell_cycle(self):
+        tests = [
+            ('/root/42BW9AAXX/C1-152',
+             sequences.FlowcellPath('42BW9AAXX', 1, 152, None)),
+            ('/root/42BW9AAXX/C1-152/',
+             sequences.FlowcellPath('42BW9AAXX', 1, 152, None)),
+            ('/root/42BW9AAXX/C1-152/Project_12345',
+             sequences.FlowcellPath('42BW9AAXX', 1, 152, 'Project_12345')),
+            ('/root/42BW9AAXX/C1-152/Project_12345/',
+             sequences.FlowcellPath('42BW9AAXX', 1, 152, 'Project_12345')),
+        ]
+
+        for t in tests:
+            path = sequences.get_flowcell_cycle(t[0])
+            self.failUnlessEqual(path, t[1])
+
      def test_flowcell_cycle(self):
          """
          Make sure code to parse directory heirarchy works
@@ -38,7 +55,6 @@ class SequenceFileTests(unittest.TestCase):
          path = '/root/42BW9AAXX/other'
          self.failUnlessRaises(ValueError, sequences.get_flowcell_cycle, path)
  
-
      def test_srf(self):
          path = '/root/42BW9AAXX/C1-38'
          name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_4.srf'
author	Diane Trout <diane@caltech.edu>
	Mon, 5 Mar 2012 22:16:11 +0000 (14:16 -0800)
committer	Diane Trout <diane@caltech.edu>
	Mon, 5 Mar 2012 22:18:02 +0000 (14:18 -0800)
htsworkflow/pipelines/sequences.py		patch \| blob \| history
htsworkflow/pipelines/test/test_sequences.py		patch \| blob \| history