Move some common runfolder path management code into its own module
authorDiane Trout <diane@caltech.edu>
Mon, 7 Mar 2011 23:33:19 +0000 (15:33 -0800)
committerDiane Trout <diane@caltech.edu>
Mon, 7 Mar 2011 23:33:19 +0000 (15:33 -0800)
I discovered I had two different functions for checking if something
was a runfolder name. I put them in once place and linked the
two together.
Then I cleaned up the test code that was calling them.

htsworkflow/automation/copier.py
htsworkflow/automation/solexa.py [new file with mode: 0644]
htsworkflow/automation/spoolwatcher.py
htsworkflow/automation/test/test_runner.py
htsworkflow/automation/test/test_solexa_utils.py [new file with mode: 0644]
test/test_copier.py

index 0f526948615996c7d80ac8c61834b5da81c798aa..0b1256e509e96d6c297c8eaca4c3ab31ed48dfe1 100644 (file)
@@ -13,14 +13,7 @@ import urlparse
 
 from benderjab import rpc
 
-def runfolder_validate(fname):
-    """
-    Return True if fname looks like a runfolder name
-    """
-    if re.match("^[0-9]{6}_[-A-Za-z0-9_]*$", fname):
-        return True
-    else:
-        return False
+from htsworkflow.automation.solexa import is_runfolder
     
 class rsync(object):
   def __init__(self, sources, dest, pwfile):
@@ -240,7 +233,7 @@ class CopierBot(rpc.XmlRpcBot):
         self.rsync.poll()
         
         # see if we're still copying
-        if runfolder_validate(runDir):
+        if is_runfolder(runDir):
             logging.info("recevied sequencing finshed for %s" % (runDir))
             self.pending.append(runDir)
             self.startCopy()
diff --git a/htsworkflow/automation/solexa.py b/htsworkflow/automation/solexa.py
new file mode 100644 (file)
index 0000000..e2b5f59
--- /dev/null
@@ -0,0 +1,41 @@
+"""Utilities to help process solexa/illumina runfolders
+"""
+import os
+import re
+
+def is_runfolder(name):
+    """
+    Is it a runfolder?
+
+    >>> print is_runfolder('090630_HWUSI-EAS999_0006_30LNFAAXX')
+    True
+    >>> print is_runfolder('hello')
+    False
+    """
+    if re.match("^[0-9]{6}_[-A-Za-z0-9_]*$", name):
+        return True
+    else:
+        return False
+
+def get_top_dir(root, path):
+    """
+    Return the directory in path that is a subdirectory of root.
+    e.g.
+
+    >>> print get_top_dir('/a/b/c', '/a/b/c/d/e/f')
+    d
+    >>> print get_top_dir('/a/b/c/', '/a/b/c/d/e/f')
+    d
+    >>> print get_top_dir('/a/b/c', '/g/e/f')
+    None
+    >>> print get_top_dir('/a/b/c', '/a/b/c')
+    <BLANKLINE>
+    """
+    if path.startswith(root):
+        subpath = path[len(root):]
+        if subpath.startswith('/'):
+            subpath = subpath[1:]
+        return subpath.split(os.path.sep)[0]
+    else:
+        return None
+
index 93efe27f96a94478f9262ca0f7d2fb6724911165..932efbf41ca9045656015962e4f2e3593e2812c2 100644 (file)
@@ -7,6 +7,7 @@ import sys
 import time
 
 from htsworkflow.util import mount
+from htsworkflow.automation.solexa import is_runfolder, get_top_dir
 
 # this uses pyinotify
 import pyinotify
@@ -16,41 +17,6 @@ IN_UNMOUNT = EventsCodes.ALL_FLAGS['IN_UNMOUNT']
 
 from benderjab import rpc
 
-def is_runfolder(name):
-    """
-    Is it a runfolder?
-
-    >>> print is_runfolder('090630_HWUSI-EAS999_0006_30LNFAAXX')
-    True
-    >>> print is_runfolder('hello')
-    False
-    """
-    if re.match("[0-9]{6}_.*", name):
-        return True
-    else:
-        return False
-
-def get_top_dir(root, path):
-    """
-    Return the directory in path that is a subdirectory of root.
-    e.g.
-
-    >>> print get_top_dir('/a/b/c', '/a/b/c/d/e/f')
-    d
-    >>> print get_top_dir('/a/b/c/', '/a/b/c/d/e/f')
-    d
-    >>> print get_top_dir('/a/b/c', '/g/e/f')
-    None
-    >>> print get_top_dir('/a/b/c', '/a/b/c')
-    <BLANKLINE>
-    """
-    if path.startswith(root):
-        subpath = path[len(root):]
-        if subpath.startswith('/'):
-            subpath = subpath[1:]
-        return subpath.split(os.path.sep)[0]
-    else:
-        return None
 
 class WatcherEvent(object):
     """
index 6c3b9df5491bd95abfe75d814f273951fb847ce7..1457f9effbb44e764f119184a572cd48d232e940 100644 (file)
@@ -2,7 +2,7 @@ import unittest
 
 
 import os
-from htsworkflow.automation.copier import runfolder_validate
+from htsworkflow.automation.solexa import is_runfolder
 
 def extract_runfolder_path(watchdir, event):
   runfolder_path = watchdir
@@ -13,7 +13,7 @@ def extract_runfolder_path(watchdir, event):
   fragments = path[len(watchdir):].split(os.path.sep)
   for f in fragments:
     runfolder_path = os.path.join(runfolder_path, f)
-    if runfolder_validate(f):
+    if is_runfolder(f):
       return runfolder_path
   return None
 
diff --git a/htsworkflow/automation/test/test_solexa_utils.py b/htsworkflow/automation/test/test_solexa_utils.py
new file mode 100644 (file)
index 0000000..a527ad3
--- /dev/null
@@ -0,0 +1,31 @@
+
+import unittest
+
+from htsworkflow.automation import solexa
+
+class testSolexaRunfolderUtils(unittest.TestCase):
+    def test_is_runfolder(self):
+        self.failUnlessEqual(solexa.is_runfolder(""), False)
+        self.failUnlessEqual(solexa.is_runfolder("1345_23"), False)
+        self.failUnlessEqual(solexa.is_runfolder("123456_asdf-$23'"), False)
+        self.failUnlessEqual(solexa.is_runfolder("123456_USI-EAS44"), True)
+        self.failUnlessEqual(solexa.is_runfolder("123456_USI-EAS44 "), False)
+
+
+    def test_get_top_dir(self):
+        test_data = [ # root, path, response
+                      ('/a/b/c', '/a/b/c/d/e/f', 'd'),
+                      ('/a/b/c/', '/a/b/c/d/e/f', 'd'),
+                      ('/a/b/c', '/g/e/f', None),
+                      ('/a/b/c', '/a/b/c', ''),
+                    ]
+        
+        for root, path, response in test_data:
+            self.failUnlessEqual(solexa.get_top_dir(root, path), response)
+            
+def suite():
+    return unittest.makeSuite(testSolexaRunfolderUtils, 'test')
+
+if __name__ == "__main__":
+    unittest.main(defaultTest="suite")
+    
index 3a0890f9d14fa2ef1e0b30fc94917b393caf8226..36078dc438b083b7fec43dcbe932bc2bdab21225 100644 (file)
@@ -2,15 +2,9 @@ import unittest
 
 from StringIO import StringIO
 from htsworkflow.automation import copier
+from htsworkflow.automation.solexa import is_runfolder
 
-class testCopier(unittest.TestCase):
-    def test_runfolder_validate(self):
-        self.failUnlessEqual(copier.runfolder_validate(""), False)
-        self.failUnlessEqual(copier.runfolder_validate("1345_23"), False)
-        self.failUnlessEqual(copier.runfolder_validate("123456_asdf-$23'"), False)
-        self.failUnlessEqual(copier.runfolder_validate("123456_USI-EAS44"), True)
-        self.failUnlessEqual(copier.runfolder_validate("123456_USI-EAS44 "), False)
-        
+class testCopier(unittest.TestCase):        
     def test_empty_config(self):
         cfg = StringIO("""[fake]
 something: unrelated