Update fastqname code & test code to better support having optional compression exten...
[htsworkflow.git] / htsworkflow / submission / fastqname.py
index f749d4096246529b264d495d883cad6cb7c005fb..0ef67da8f0f9c2a1833903bf44101493f0250ec1 100644 (file)
@@ -2,16 +2,22 @@
 """
 import collections
 import re
-PAIRED_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}_r{read}.fastq'
-SINGLE_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}.fastq'
+PAIRED_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}_r{read}.fastq{compression_extension}'
+SINGLE_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}.fastq{compression_extension}'
 
 FASTQ_RE = re.compile(
     '(?P<lib_id>[^_]+)_(?P<flowcell>[^_]+)_'\
-    'c(?P<cycle>[\d]+)_l(?P<lane>[\d]+)(_r(?P<read>[\d]))?\.fastq')
+    'c(?P<cycle>[\d]+)_l(?P<lane>[\d]+)(_r(?P<read>[\d]))?\.fastq(?P<compression_extension>.[\w]+)?')
 
 class FastqName(collections.Mapping):
+    """Utility class to convert to the standardized submission fastq name.
+    """
     def __init__(self, is_paired=None, **kwargs):
-        self._attributes = ('flowcell', 'lib_id', 'lane', 'read', 'cycle')
+        """Create a fastq name handler.
+
+        Takes filename or common attributes like flowcell, lib_id, lane, read, cycle
+        """
+        self._attributes = ('flowcell', 'lib_id', 'lane', 'read', 'cycle', 'compression_extension')
         self._is_paired = is_paired
 
         if len(kwargs) == 0:
@@ -28,7 +34,6 @@ class FastqName(collections.Mapping):
                 value = kwargs[k]
             self[k] = value
 
-
     def _init_by_filename(self, filename):
         match = FASTQ_RE.match(filename)
         if match is None:
@@ -53,7 +58,10 @@ class FastqName(collections.Mapping):
         for k in self.keys():
             if k == 'read':
                 continue
-            if self[k] is None:
+            elif k == 'compression_extension':
+                if self[k] not in (None, '', '.gz', '.bz2'):
+                    return False
+            elif self[k] is None:
                 return False
         return True
     is_valid = property(_is_valid)
@@ -64,7 +72,11 @@ class FastqName(collections.Mapping):
                 "Please set all needed variables before generating a filename")
 
         T = PAIRED_TEMPLATE if self.is_paired else SINGLE_TEMPLATE
-        return T.format(**self)
+        attributes = {}
+        for k in self:
+            v = self[k]
+            attributes[k] = v if v is not None else ''
+        return T.format(**attributes)
     filename = property(_get_filename)
 
     def __iter__(self):