"""
Utilities to work with the various eras of sequence archive files
"""
+import collections
import logging
import os
+import types
import re
LOGGER = logging.getLogger(__name__)
""" %( {'table': SEQUENCE_TABLE_NAME} )
return cursor.execute(sql)
+FlowcellPath = collections.namedtuple('FlowcellPath',
+ 'flowcell start stop project')
+
class SequenceFile(object):
"""
Simple container class that holds the path to a sequence archive
"""
Extract flowcell, cycle from pathname
"""
+ path = os.path.normpath(path)
project = None
rest, tail = os.path.split(path)
if tail.startswith('Project_'):
if stop is not None:
stop = int(stop)
- return flowcell, start, stop, project
+ return FlowcellPath(flowcell, start, stop, project)
def parse_srf(path, filename):
flowcell_dir, start, stop, project = get_flowcell_cycle(path)
Scan through a list of directories for sequence like files
"""
sequences = []
+ if type(dirs) in types.StringTypes:
+ raise ValueError("You probably want a list or set, not a string")
+
for d in dirs:
LOGGER.info("Scanning %s for sequences" % (d,))
if not os.path.exists(d):
from htsworkflow.pipelines import sequences
+
class SequenceFileTests(unittest.TestCase):
"""
Make sure the sequence archive class works
"""
+ def test_get_flowcell_cycle(self):
+ tests = [
+ ('/root/42BW9AAXX/C1-152',
+ sequences.FlowcellPath('42BW9AAXX', 1, 152, None)),
+ ('/root/42BW9AAXX/C1-152/',
+ sequences.FlowcellPath('42BW9AAXX', 1, 152, None)),
+ ('/root/42BW9AAXX/C1-152/Project_12345',
+ sequences.FlowcellPath('42BW9AAXX', 1, 152, 'Project_12345')),
+ ('/root/42BW9AAXX/C1-152/Project_12345/',
+ sequences.FlowcellPath('42BW9AAXX', 1, 152, 'Project_12345')),
+ ]
+
+ for t in tests:
+ path = sequences.get_flowcell_cycle(t[0])
+ self.failUnlessEqual(path, t[1])
+
def test_flowcell_cycle(self):
"""
Make sure code to parse directory heirarchy works
path = '/root/42BW9AAXX/other'
self.failUnlessRaises(ValueError, sequences.get_flowcell_cycle, path)
-
def test_srf(self):
path = '/root/42BW9AAXX/C1-38'
name = 'woldlab_090622_HWI-EAS229_0120_42BW9AAXX_4.srf'