From 573b1e154e8f621c1d60b0e008101d4bec19de11 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Tue, 21 Jun 2016 14:30:50 -0700 Subject: [PATCH] Implement writing to compressed files for qseq2fastq Along the way refactor some of the code I had in desplit_fastq to make it easier to share the open compressed streams logic --- htsworkflow/pipelines/desplit_fastq.py | 18 ++++++++++++------ htsworkflow/pipelines/qseq2fastq.py | 9 +++++++-- htsworkflow/submission/condorfastq.py | 10 +++++++--- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/htsworkflow/pipelines/desplit_fastq.py b/htsworkflow/pipelines/desplit_fastq.py index 46a7509..d3db078 100644 --- a/htsworkflow/pipelines/desplit_fastq.py +++ b/htsworkflow/pipelines/desplit_fastq.py @@ -30,12 +30,7 @@ def main(cmdline=None): return 0 if opts.output is not None: - if opts.bzip: - output = bz2.open(opts.output, 'wt') - elif opts.gzip: - output = gzip.open(opts.output, 'wt') - else: - output = open(opts.output, 'w') + output = open_output(opts.output, opts) else: output = sys.stdout @@ -66,6 +61,17 @@ def make_parser(): return parser +def open_output(output, opts): + """Open output file with right compression library + """ + if opts.bzip: + return bz2.open(output, 'wt') + elif opts.gzip: + return gzip.open(output, 'wt') + else: + return open(output, 'w') + + def file_generator(pattern_list): """Given a list of glob patterns return decompressed streams """ diff --git a/htsworkflow/pipelines/qseq2fastq.py b/htsworkflow/pipelines/qseq2fastq.py index c2fbcaf..dc7a785 100644 --- a/htsworkflow/pipelines/qseq2fastq.py +++ b/htsworkflow/pipelines/qseq2fastq.py @@ -11,6 +11,7 @@ import tarfile from htsworkflow.util.version import version from htsworkflow.util.conversion import parse_slice +from htsworkflow.pipelines.desplit_fastq import open_output def main(cmdline=None): @@ -31,12 +32,12 @@ def main(cmdline=None): qseq_generator = [sys.stdin] if opts.output is not None: - output = open(opts.output, 'w') + output = open_output(opts.output, opts) else: output = sys.stdout if opts.nopass_output is not None: - nopass_output = open(opts.nopass_output, 'w') + nopass_output = open_output(opts.nopass_output, opts) else: nopass_output = None @@ -70,6 +71,10 @@ def make_parser(): default=None) parser.add_option("--pf", help="report pass filter flag", default=False, action="store_true") + parser.add_option('--gzip', default=False, action='store_true', + help='gzip output') + parser.add_option('--bzip', default=False, action='store_true', + help='bzip output') parser.add_option("--version", default=False, action="store_true", help="report software version") diff --git a/htsworkflow/submission/condorfastq.py b/htsworkflow/submission/condorfastq.py index 01173ca..fdac4ba 100644 --- a/htsworkflow/submission/condorfastq.py +++ b/htsworkflow/submission/condorfastq.py @@ -287,10 +287,13 @@ WHERE { for source in sources: paths.append(source.path) paths.sort() + compression_argument = self.format_compression_flag() + return { 'pyscript': qseq2fastq.__file__, 'flowcell': sources[0].flowcell_id, 'target': target_pathname, + 'compression': compression_argument, 'sources': paths, 'ispaired': sources[0].ispaired, 'istar': len(sources) == 1, @@ -301,9 +304,7 @@ WHERE { for source in sources: paths.append(source.path) paths.sort() - compression_argument = '' - if self.compression: - compression_argument = '--'+self.compression + compression_argument = self.format_compression_flag() return { 'pyscript': desplit_fastq.__file__, @@ -313,6 +314,9 @@ WHERE { 'ispaired': sources[0].ispaired, } + def format_compression_flag(self): + return '--'+self.compression if self.compression else '' + def make_lane_dict(lib_db, lib_id): """ -- 2.30.2