Update fastqname code & test code to better support having optional compression exten...
[htsworkflow.git] / htsworkflow / submission / fastqname.py
index ac3d5fc4ed59a4ff944588c015ae402fba1b0e53..0ef67da8f0f9c2a1833903bf44101493f0250ec1 100644 (file)
@@ -7,7 +7,7 @@ SINGLE_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}.fastq{compression_extens
 
 FASTQ_RE = re.compile(
     '(?P<lib_id>[^_]+)_(?P<flowcell>[^_]+)_'\
-    'c(?P<cycle>[\d]+)_l(?P<lane>[\d]+)(_r(?P<read>[\d]))?\.fastq')
+    'c(?P<cycle>[\d]+)_l(?P<lane>[\d]+)(_r(?P<read>[\d]))?\.fastq(?P<compression_extension>.[\w]+)?')
 
 class FastqName(collections.Mapping):
     """Utility class to convert to the standardized submission fastq name.
@@ -58,7 +58,10 @@ class FastqName(collections.Mapping):
         for k in self.keys():
             if k == 'read':
                 continue
-            if self[k] is None:
+            elif k == 'compression_extension':
+                if self[k] not in (None, '', '.gz', '.bz2'):
+                    return False
+            elif self[k] is None:
                 return False
         return True
     is_valid = property(_is_valid)
@@ -69,7 +72,11 @@ class FastqName(collections.Mapping):
                 "Please set all needed variables before generating a filename")
 
         T = PAIRED_TEMPLATE if self.is_paired else SINGLE_TEMPLATE
-        return T.format(**self)
+        attributes = {}
+        for k in self:
+            v = self[k]
+            attributes[k] = v if v is not None else ''
+        return T.format(**attributes)
     filename = property(_get_filename)
 
     def __iter__(self):