FASTQ_RE = re.compile(
'(?P<lib_id>[^_]+)_(?P<flowcell>[^_]+)_'\
- 'c(?P<cycle>[\d]+)_l(?P<lane>[\d]+)(_r(?P<read>[\d]))?\.fastq')
+ 'c(?P<cycle>[\d]+)_l(?P<lane>[\d]+)(_r(?P<read>[\d]))?\.fastq(?P<compression_extension>.[\w]+)?')
class FastqName(collections.Mapping):
"""Utility class to convert to the standardized submission fastq name.
for k in self.keys():
if k == 'read':
continue
- if self[k] is None:
+ elif k == 'compression_extension':
+ if self[k] not in (None, '', '.gz', '.bz2'):
+ return False
+ elif self[k] is None:
return False
return True
is_valid = property(_is_valid)
"Please set all needed variables before generating a filename")
T = PAIRED_TEMPLATE if self.is_paired else SINGLE_TEMPLATE
- return T.format(**self)
+ attributes = {}
+ for k in self:
+ v = self[k]
+ attributes[k] = v if v is not None else ''
+ return T.format(**attributes)
filename = property(_get_filename)
def __iter__(self):
self.assertEqual(fq.lane, "1")
self.assertEqual(fq['lane'], "1")
self.assertEqual(fq.is_paired, False)
+ self.assertEqual(fq.compression_extension, '')
+
+ def test_init_single_filename_gz(self):
+ fq = FastqName(filename="12345_AABBCCDDXX_c100_l1.fastq.gz")
+ self.assertEqual(fq.lib_id, "12345")
+ self.assertEqual(fq['lib_id'], "12345")
+ self.assertEqual(fq.flowcell, "AABBCCDDXX")
+ self.assertEqual(fq['flowcell'], "AABBCCDDXX")
+ self.assertEqual(fq.cycle, "100")
+ self.assertEqual(fq['cycle'], "100")
+ self.assertEqual(fq.lane, "1")
+ self.assertEqual(fq['lane'], "1")
+ self.assertEqual(fq.is_paired, False)
+ self.assertEqual(fq.compression_extension, '.gz')
def test_init_single_filename(self):
fq = FastqName(filename="12345_AABBCCDDXX_c100_l1_r2.fastq")
self.assertEqual(fq.read, "2")
self.assertEqual(fq['read'], "2")
self.assertEqual(fq.is_paired, True)
+ self.assertEqual(fq.compression_extension, None)
def test_init_bad_filename(self):
attribs = {'filename': 'asdf.bam'}
self.assertEqual(fq['lane'], "1")
self.assertEqual(fq.is_paired, False)
self.assertEqual(fq.filename, "12345_AABBCCDDXX_c100_l1.fastq")
+ self.assertEqual(fq.compression_extension, None)
def test_init_single_attributes_set_single(self):
fq = FastqName(lib_id="12345", flowcell="AABBCCDDXX",
self.assertEqual(fq.is_valid, False)
self.assertEqual(fq.is_paired, True)
+ def test_init_single_attributes_set_paired_bzip2(self):
+ fq = FastqName(lib_id="12345", flowcell="AABBCCDDXX",
+ cycle = "100", lane="1", is_paired=True,
+ compression_extension='.bzip2')
+ self.assertEqual(fq.is_valid, False)
+ self.assertEqual(fq.is_paired, True)
+ self.assertEqual(fq.compression_extension, '.bzip2')
+
def test_init_paired_attributes(self):
fq = FastqName(lib_id="12345", flowcell="AABBCCDDXX",
cycle = "100", lane="1", read="2")