Implement writing to compressed files for qseq2fastq
authorDiane Trout <diane@ghic.org>
Tue, 21 Jun 2016 21:30:50 +0000 (14:30 -0700)
committerDiane Trout <diane@ghic.org>
Tue, 21 Jun 2016 21:30:50 +0000 (14:30 -0700)
Along the way refactor some of the code I had in
desplit_fastq to make it easier to share the open compressed
streams logic

htsworkflow/pipelines/desplit_fastq.py
htsworkflow/pipelines/qseq2fastq.py
htsworkflow/submission/condorfastq.py

index 46a75094a351802b4e3c8e7f02449ebf2d737602..d3db078b0daad29524f8deea9477a796efbdcd9f 100644 (file)
@@ -30,12 +30,7 @@ def main(cmdline=None):
         return 0
 
     if opts.output is not None:
-        if opts.bzip:
-            output = bz2.open(opts.output, 'wt')
-        elif opts.gzip:
-            output = gzip.open(opts.output, 'wt')
-        else:
-            output = open(opts.output, 'w')
+        output = open_output(opts.output, opts)
     else:
         output = sys.stdout
 
@@ -66,6 +61,17 @@ def make_parser():
     return parser
 
 
+def open_output(output, opts):
+    """Open output file with right compression library
+    """
+    if opts.bzip:
+        return bz2.open(output, 'wt')
+    elif opts.gzip:
+        return gzip.open(output, 'wt')
+    else:
+        return open(output, 'w')
+
+
 def file_generator(pattern_list):
     """Given a list of glob patterns return decompressed streams
     """
index c2fbcaf65a5962daf42f3c4953490d6fa64c5c95..dc7a785fac616dc14c0223050dd5a05a1028b57c 100644 (file)
@@ -11,6 +11,7 @@ import tarfile
 
 from htsworkflow.util.version import version
 from htsworkflow.util.conversion import parse_slice
+from htsworkflow.pipelines.desplit_fastq import open_output
 
 
 def main(cmdline=None):
@@ -31,12 +32,12 @@ def main(cmdline=None):
         qseq_generator = [sys.stdin]
 
     if opts.output is not None:
-        output = open(opts.output, 'w')
+        output = open_output(opts.output, opts)
     else:
         output = sys.stdout
 
     if opts.nopass_output is not None:
-        nopass_output = open(opts.nopass_output, 'w')
+        nopass_output = open_output(opts.nopass_output, opts)
     else:
         nopass_output = None
 
@@ -70,6 +71,10 @@ def make_parser():
                       default=None)
     parser.add_option("--pf", help="report pass filter flag", default=False,
                       action="store_true")
+    parser.add_option('--gzip', default=False, action='store_true',
+                      help='gzip output')
+    parser.add_option('--bzip', default=False, action='store_true',
+                      help='bzip output')
     parser.add_option("--version", default=False, action="store_true",
                       help="report software version")
 
index 01173cad21404f2e3e1a458c8497bbbd8be92ddc..fdac4ba6fa45784d8392af2d5c7a9d95e7c042f0 100644 (file)
@@ -287,10 +287,13 @@ WHERE {
         for source in sources:
             paths.append(source.path)
         paths.sort()
+        compression_argument = self.format_compression_flag()
+
         return {
             'pyscript': qseq2fastq.__file__,
             'flowcell': sources[0].flowcell_id,
             'target': target_pathname,
+            'compression': compression_argument,
             'sources': paths,
             'ispaired': sources[0].ispaired,
             'istar': len(sources) == 1,
@@ -301,9 +304,7 @@ WHERE {
         for source in sources:
             paths.append(source.path)
         paths.sort()
-        compression_argument = ''
-        if self.compression:
-            compression_argument = '--'+self.compression
+        compression_argument = self.format_compression_flag()
 
         return {
             'pyscript': desplit_fastq.__file__,
@@ -313,6 +314,9 @@ WHERE {
             'ispaired': sources[0].ispaired,
         }
 
+    def format_compression_flag(self):
+        return '--'+self.compression if self.compression else ''
+
 
 def make_lane_dict(lib_db, lib_id):
     """