From f12244468a66fedbbc758c2a2cbeba9d3a1ed9bc Mon Sep 17 00:00:00 2001
From: Diane Trout <diane@caltech.edu>
Date: Mon, 5 Mar 2012 14:16:11 -0800
Subject: [PATCH] Use named tuple for sequence flowcell/start/stop/project dir
 extractor

I was returning too much stuff from get_flowcell_cycle, so I thought
a named tuple would make it more comprehensible.
---
 htsworkflow/pipelines/sequences.py           | 11 ++++++++++-
 htsworkflow/pipelines/test/test_sequences.py | 18 +++++++++++++++++-
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/htsworkflow/pipelines/sequences.py b/htsworkflow/pipelines/sequences.py
index f21e48c..f3cc9fe 100644
--- a/htsworkflow/pipelines/sequences.py
+++ b/htsworkflow/pipelines/sequences.py
@@ -1,8 +1,10 @@
 """
 Utilities to work with the various eras of sequence archive files
 """
+import collections
 import logging
 import os
+import types
 import re
 
 LOGGER = logging.getLogger(__name__)
@@ -29,6 +31,9 @@ CREATE TABLE %(table)s (
 """ %( {'table': SEQUENCE_TABLE_NAME} )
     return cursor.execute(sql)
 
+FlowcellPath = collections.namedtuple('FlowcellPath',
+                                      'flowcell start stop project')
+
 class SequenceFile(object):
     """
     Simple container class that holds the path to a sequence archive
@@ -118,6 +123,7 @@ def get_flowcell_cycle(path):
     """
     Extract flowcell, cycle from pathname
     """
+    path = os.path.normpath(path)
     project = None
     rest, tail = os.path.split(path)
     if tail.startswith('Project_'):
@@ -140,7 +146,7 @@ def get_flowcell_cycle(path):
     if stop is not None:
         stop = int(stop)
 
-    return flowcell, start, stop, project
+    return FlowcellPath(flowcell, start, stop, project)
 
 def parse_srf(path, filename):
     flowcell_dir, start, stop, project = get_flowcell_cycle(path)
@@ -243,6 +249,9 @@ def scan_for_sequences(dirs):
     Scan through a list of directories for sequence like files
     """
     sequences = []
+    if type(dirs) in types.StringTypes:
+        raise ValueError("You probably want a list or set, not a string")
+
     for d in dirs:
         LOGGER.info("Scanning %s for sequences" % (d,))
         if not os.path.exists(d):
diff --git a/htsworkflow/pipelines/test/test_sequences.py b/htsworkflow/pipelines/test/test_sequences.py
index cede8c2..157246a 100644
--- a/htsworkflow/pipelines/test/test_sequences.py
+++ b/htsworkflow/pipelines/test/test_sequences.py
@@ -4,10 +4,27 @@ import unittest
 
 from htsworkflow.pipelines import sequences
 
+
 class SequenceFileTests(unittest.TestCase):
     """
     Make sure the sequence archive class works
     """
+    def test_get_flowcell_cycle(self):
+        tests = [
+            ('/root/42BW9AAXX/C1-152',
+             sequences.FlowcellPath('42BW9AAXX', 1, 152, None)),
+            ('/root/42BW9AAXX/C1-152/',
+             sequences.FlowcellPath('42BW9AAXX', 1, 152, None)),
+            ('/root/42BW9AAXX/C1-152/Project_12345',
+             sequences.FlowcellPath('42BW9AAXX', 1, 152, 'Project_12345')),
+            ('/root/42BW9AAXX/C1-152/Project_12345/',
+             sequences.FlowcellPath('42BW9AAXX', 1, 152, 'Project_12345')),
+        ]
+
+        for t in tests:
+            path = sequences.get_flowcell_cycle(t[0])
+            self.failUnlessEqual(path, t[1])
+
     def test_flowcell_cycle(self):
         """
         Make sure code to parse directory heirarchy works
@@ -38,7 +55,6 @@ class SequenceFileTests(unittest.TestCase):
         path = '/root/42BW9AAXX/other'
         self.failUnlessRaises(ValueError, sequences.get_flowcell_cycle, path)
 
-
     def test_srf(self):
         path = '/root/42BW9AAXX/C1-38'
         name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_4.srf'
-- 
2.30.2