Update fastqname code & test code to better support having optional compression exten...
[htsworkflow.git] / htsworkflow / submission / fastqname.py
index 9dd52a0319fb9e9efa230e68344ef53156645be7..0ef67da8f0f9c2a1833903bf44101493f0250ec1 100644 (file)
@@ -2,12 +2,12 @@
 """
 import collections
 import re
-PAIRED_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}_r{read}.fastq'
-SINGLE_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}.fastq'
+PAIRED_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}_r{read}.fastq{compression_extension}'
+SINGLE_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}.fastq{compression_extension}'
 
 FASTQ_RE = re.compile(
     '(?P<lib_id>[^_]+)_(?P<flowcell>[^_]+)_'\
-    'c(?P<cycle>[\d]+)_l(?P<lane>[\d]+)(_r(?P<read>[\d]))?\.fastq')
+    'c(?P<cycle>[\d]+)_l(?P<lane>[\d]+)(_r(?P<read>[\d]))?\.fastq(?P<compression_extension>.[\w]+)?')
 
 class FastqName(collections.Mapping):
     """Utility class to convert to the standardized submission fastq name.
@@ -17,7 +17,7 @@ class FastqName(collections.Mapping):
 
         Takes filename or common attributes like flowcell, lib_id, lane, read, cycle
         """
-        self._attributes = ('flowcell', 'lib_id', 'lane', 'read', 'cycle')
+        self._attributes = ('flowcell', 'lib_id', 'lane', 'read', 'cycle', 'compression_extension')
         self._is_paired = is_paired
 
         if len(kwargs) == 0:
@@ -58,7 +58,10 @@ class FastqName(collections.Mapping):
         for k in self.keys():
             if k == 'read':
                 continue
-            if self[k] is None:
+            elif k == 'compression_extension':
+                if self[k] not in (None, '', '.gz', '.bz2'):
+                    return False
+            elif self[k] is None:
                 return False
         return True
     is_valid = property(_is_valid)
@@ -69,7 +72,11 @@ class FastqName(collections.Mapping):
                 "Please set all needed variables before generating a filename")
 
         T = PAIRED_TEMPLATE if self.is_paired else SINGLE_TEMPLATE
-        return T.format(**self)
+        attributes = {}
+        for k in self:
+            v = self[k]
+            attributes[k] = v if v is not None else ''
+        return T.format(**attributes)
     filename = property(_get_filename)
 
     def __iter__(self):