From 1f296ab3a9c3e7de3270704f789d9e123b0cb69e Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Tue, 10 Jun 2014 15:11:25 -0700 Subject: [PATCH] Update fastqname code & test code to better support having optional compression extension. Its a bit weird as thats the first property which is optional, so it needed some special case logic. Also I accept either None or '' for the case of no compression extension as condorfastq passes in compression_extension='' --- htsworkflow/submission/fastqname.py | 13 +++++++--- htsworkflow/submission/test/test_fastqname.py | 24 +++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/htsworkflow/submission/fastqname.py b/htsworkflow/submission/fastqname.py index ac3d5fc..0ef67da 100644 --- a/htsworkflow/submission/fastqname.py +++ b/htsworkflow/submission/fastqname.py @@ -7,7 +7,7 @@ SINGLE_TEMPLATE = '{lib_id}_{flowcell}_c{cycle}_l{lane}.fastq{compression_extens FASTQ_RE = re.compile( '(?P[^_]+)_(?P[^_]+)_'\ - 'c(?P[\d]+)_l(?P[\d]+)(_r(?P[\d]))?\.fastq') + 'c(?P[\d]+)_l(?P[\d]+)(_r(?P[\d]))?\.fastq(?P.[\w]+)?') class FastqName(collections.Mapping): """Utility class to convert to the standardized submission fastq name. @@ -58,7 +58,10 @@ class FastqName(collections.Mapping): for k in self.keys(): if k == 'read': continue - if self[k] is None: + elif k == 'compression_extension': + if self[k] not in (None, '', '.gz', '.bz2'): + return False + elif self[k] is None: return False return True is_valid = property(_is_valid) @@ -69,7 +72,11 @@ class FastqName(collections.Mapping): "Please set all needed variables before generating a filename") T = PAIRED_TEMPLATE if self.is_paired else SINGLE_TEMPLATE - return T.format(**self) + attributes = {} + for k in self: + v = self[k] + attributes[k] = v if v is not None else '' + return T.format(**attributes) filename = property(_get_filename) def __iter__(self): diff --git a/htsworkflow/submission/test/test_fastqname.py b/htsworkflow/submission/test/test_fastqname.py index d51ad0e..56c094e 100644 --- a/htsworkflow/submission/test/test_fastqname.py +++ b/htsworkflow/submission/test/test_fastqname.py @@ -17,6 +17,20 @@ class TestFastqName(TestCase): self.assertEqual(fq.lane, "1") self.assertEqual(fq['lane'], "1") self.assertEqual(fq.is_paired, False) + self.assertEqual(fq.compression_extension, '') + + def test_init_single_filename_gz(self): + fq = FastqName(filename="12345_AABBCCDDXX_c100_l1.fastq.gz") + self.assertEqual(fq.lib_id, "12345") + self.assertEqual(fq['lib_id'], "12345") + self.assertEqual(fq.flowcell, "AABBCCDDXX") + self.assertEqual(fq['flowcell'], "AABBCCDDXX") + self.assertEqual(fq.cycle, "100") + self.assertEqual(fq['cycle'], "100") + self.assertEqual(fq.lane, "1") + self.assertEqual(fq['lane'], "1") + self.assertEqual(fq.is_paired, False) + self.assertEqual(fq.compression_extension, '.gz') def test_init_single_filename(self): fq = FastqName(filename="12345_AABBCCDDXX_c100_l1_r2.fastq") @@ -31,6 +45,7 @@ class TestFastqName(TestCase): self.assertEqual(fq.read, "2") self.assertEqual(fq['read'], "2") self.assertEqual(fq.is_paired, True) + self.assertEqual(fq.compression_extension, None) def test_init_bad_filename(self): attribs = {'filename': 'asdf.bam'} @@ -50,6 +65,7 @@ class TestFastqName(TestCase): self.assertEqual(fq['lane'], "1") self.assertEqual(fq.is_paired, False) self.assertEqual(fq.filename, "12345_AABBCCDDXX_c100_l1.fastq") + self.assertEqual(fq.compression_extension, None) def test_init_single_attributes_set_single(self): fq = FastqName(lib_id="12345", flowcell="AABBCCDDXX", @@ -63,6 +79,14 @@ class TestFastqName(TestCase): self.assertEqual(fq.is_valid, False) self.assertEqual(fq.is_paired, True) + def test_init_single_attributes_set_paired_bzip2(self): + fq = FastqName(lib_id="12345", flowcell="AABBCCDDXX", + cycle = "100", lane="1", is_paired=True, + compression_extension='.bzip2') + self.assertEqual(fq.is_valid, False) + self.assertEqual(fq.is_paired, True) + self.assertEqual(fq.compression_extension, '.bzip2') + def test_init_paired_attributes(self): fq = FastqName(lib_id="12345", flowcell="AABBCCDDXX", cycle = "100", lane="1", read="2") -- 2.30.2