Update fastqname code & test code to better support having optional compression exten...
authorDiane Trout <diane@ghic.org>
Tue, 10 Jun 2014 22:11:25 +0000 (15:11 -0700)
committerDiane Trout <diane@ghic.org>
Tue, 10 Jun 2014 23:34:31 +0000 (16:34 -0700)
Its a bit weird as thats the first property which is optional, so
it needed some special case logic.

Also I accept either None or '' for the case of no compression extension as
condorfastq passes in compression_extension=''

htsworkflow/submission/fastqname.py
htsworkflow/submission/test/test_fastqname.py

index ac3d5fc4ed59a4ff944588c015ae402fba1b0e53..0ef67da8f0f9c2a1833903bf44101493f0250ec1 100644 (file)
@@ -7,7 +7,7 @@ SINGLE_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}.fastq{compression_extens
 
 FASTQ_RE = re.compile(
     '(?P<lib_id>[^_]+)_(?P<flowcell>[^_]+)_'\
-    'c(?P<cycle>[\d]+)_l(?P<lane>[\d]+)(_r(?P<read>[\d]))?\.fastq')
+    'c(?P<cycle>[\d]+)_l(?P<lane>[\d]+)(_r(?P<read>[\d]))?\.fastq(?P<compression_extension>.[\w]+)?')
 
 class FastqName(collections.Mapping):
     """Utility class to convert to the standardized submission fastq name.
@@ -58,7 +58,10 @@ class FastqName(collections.Mapping):
         for k in self.keys():
             if k == 'read':
                 continue
-            if self[k] is None:
+            elif k == 'compression_extension':
+                if self[k] not in (None, '', '.gz', '.bz2'):
+                    return False
+            elif self[k] is None:
                 return False
         return True
     is_valid = property(_is_valid)
@@ -69,7 +72,11 @@ class FastqName(collections.Mapping):
                 "Please set all needed variables before generating a filename")
 
         T = PAIRED_TEMPLATE if self.is_paired else SINGLE_TEMPLATE
-        return T.format(**self)
+        attributes = {}
+        for k in self:
+            v = self[k]
+            attributes[k] = v if v is not None else ''
+        return T.format(**attributes)
     filename = property(_get_filename)
 
     def __iter__(self):
index d51ad0e303e106126c62df30b00d4240374e68c9..56c094ea3fddb02538df63e730f0dbaf5f4deb39 100644 (file)
@@ -17,6 +17,20 @@ class TestFastqName(TestCase):
         self.assertEqual(fq.lane, "1")
         self.assertEqual(fq['lane'], "1")
         self.assertEqual(fq.is_paired, False)
+        self.assertEqual(fq.compression_extension, '')
+
+    def test_init_single_filename_gz(self):
+        fq = FastqName(filename="12345_AABBCCDDXX_c100_l1.fastq.gz")
+        self.assertEqual(fq.lib_id, "12345")
+        self.assertEqual(fq['lib_id'], "12345")
+        self.assertEqual(fq.flowcell, "AABBCCDDXX")
+        self.assertEqual(fq['flowcell'], "AABBCCDDXX")
+        self.assertEqual(fq.cycle, "100")
+        self.assertEqual(fq['cycle'], "100")
+        self.assertEqual(fq.lane, "1")
+        self.assertEqual(fq['lane'], "1")
+        self.assertEqual(fq.is_paired, False)
+        self.assertEqual(fq.compression_extension, '.gz')
 
     def test_init_single_filename(self):
         fq = FastqName(filename="12345_AABBCCDDXX_c100_l1_r2.fastq")
@@ -31,6 +45,7 @@ class TestFastqName(TestCase):
         self.assertEqual(fq.read, "2")
         self.assertEqual(fq['read'], "2")
         self.assertEqual(fq.is_paired, True)
+        self.assertEqual(fq.compression_extension, None)
 
     def test_init_bad_filename(self):
         attribs = {'filename': 'asdf.bam'}
@@ -50,6 +65,7 @@ class TestFastqName(TestCase):
         self.assertEqual(fq['lane'], "1")
         self.assertEqual(fq.is_paired, False)
         self.assertEqual(fq.filename, "12345_AABBCCDDXX_c100_l1.fastq")
+        self.assertEqual(fq.compression_extension, None)
 
     def test_init_single_attributes_set_single(self):
         fq = FastqName(lib_id="12345", flowcell="AABBCCDDXX",
@@ -63,6 +79,14 @@ class TestFastqName(TestCase):
         self.assertEqual(fq.is_valid, False)
         self.assertEqual(fq.is_paired, True)
 
+    def test_init_single_attributes_set_paired_bzip2(self):
+        fq = FastqName(lib_id="12345", flowcell="AABBCCDDXX",
+                       cycle = "100", lane="1", is_paired=True,
+                       compression_extension='.bzip2')
+        self.assertEqual(fq.is_valid, False)
+        self.assertEqual(fq.is_paired, True)
+        self.assertEqual(fq.compression_extension, '.bzip2')
+
     def test_init_paired_attributes(self):
         fq = FastqName(lib_id="12345", flowcell="AABBCCDDXX",
                        cycle = "100", lane="1", read="2")