"""
import collections
import re
-PAIRED_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}_r{read}.fastq'
-SINGLE_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}.fastq'
+PAIRED_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}_r{read}.fastq{compression_extension}'
+SINGLE_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}.fastq{compression_extension}'
FASTQ_RE = re.compile(
'(?P<lib_id>[^_]+)_(?P<flowcell>[^_]+)_'\
- 'c(?P<cycle>[\d]+)_l(?P<lane>[\d]+)(_r(?P<read>[\d]))?\.fastq')
+ 'c(?P<cycle>[\d]+)_l(?P<lane>[\d]+)(_r(?P<read>[\d]))?\.fastq(?P<compression_extension>.[\w]+)?')
class FastqName(collections.Mapping):
+ """Utility class to convert to the standardized submission fastq name.
+ """
def __init__(self, is_paired=None, **kwargs):
- self._attributes = ('flowcell', 'lib_id', 'lane', 'read', 'cycle')
+ """Create a fastq name handler.
+
+ Takes filename or common attributes like flowcell, lib_id, lane, read, cycle
+ """
+ self._attributes = ('flowcell', 'lib_id', 'lane', 'read', 'cycle', 'compression_extension')
self._is_paired = is_paired
if len(kwargs) == 0:
value = kwargs[k]
self[k] = value
-
def _init_by_filename(self, filename):
match = FASTQ_RE.match(filename)
if match is None:
for k in self.keys():
if k == 'read':
continue
- if self[k] is None:
+ elif k == 'compression_extension':
+ if self[k] not in (None, '', '.gz', '.bz2'):
+ return False
+ elif self[k] is None:
return False
return True
is_valid = property(_is_valid)
"Please set all needed variables before generating a filename")
T = PAIRED_TEMPLATE if self.is_paired else SINGLE_TEMPLATE
- return T.format(**self)
+ attributes = {}
+ for k in self:
+ v = self[k]
+ attributes[k] = v if v is not None else ''
+ return T.format(**attributes)
filename = property(_get_filename)
def __iter__(self):