Directly generate compressed fastq files from HiSeq split fastqs.
[htsworkflow.git] / htsworkflow / submission / fastqname.py
index f749d4096246529b264d495d883cad6cb7c005fb..ac3d5fc4ed59a4ff944588c015ae402fba1b0e53 100644 (file)
@@ -2,16 +2,22 @@
 """
 import collections
 import re
-PAIRED_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}_r{read}.fastq'
-SINGLE_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}.fastq'
+PAIRED_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}_r{read}.fastq{compression_extension}'
+SINGLE_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}.fastq{compression_extension}'
 
 FASTQ_RE = re.compile(
     '(?P<lib_id>[^_]+)_(?P<flowcell>[^_]+)_'\
     'c(?P<cycle>[\d]+)_l(?P<lane>[\d]+)(_r(?P<read>[\d]))?\.fastq')
 
 class FastqName(collections.Mapping):
+    """Utility class to convert to the standardized submission fastq name.
+    """
     def __init__(self, is_paired=None, **kwargs):
-        self._attributes = ('flowcell', 'lib_id', 'lane', 'read', 'cycle')
+        """Create a fastq name handler.
+
+        Takes filename or common attributes like flowcell, lib_id, lane, read, cycle
+        """
+        self._attributes = ('flowcell', 'lib_id', 'lane', 'read', 'cycle', 'compression_extension')
         self._is_paired = is_paired
 
         if len(kwargs) == 0:
@@ -28,7 +34,6 @@ class FastqName(collections.Mapping):
                 value = kwargs[k]
             self[k] = value
 
-
     def _init_by_filename(self, filename):
         match = FASTQ_RE.match(filename)
         if match is None: