2 """Write fastq data from multiple compressed files into a single file
8 from optparse import OptionParser
11 from htsworkflow.util.version import version
12 from htsworkflow.util.opener import autoopen
13 from htsworkflow.util.conversion import parse_slice
22 def main(cmdline=None):
23 """Command line driver: [None, 'option', '*.fastq.bz2']
25 parser = make_parser()
26 opts, args = parser.parse_args(cmdline)
32 if opts.output is not None:
33 output = open_output(opts.output, opts)
37 desplitter = DesplitFastq(file_generator(args), output)
38 desplitter.trim = parse_slice(opts.slice)
45 """Generate an option parser for above main function"""
47 usage = '%prog: [options] *.fastq.gz'
48 parser = OptionParser(usage)
50 parser.add_option('-o', '--output', default=None,
51 help='output fastq file')
52 parser.add_option('-s', '--slice',
53 help="specify python slice, e.g. 0:75, 0:-1",
55 parser.add_option('--gzip', default=False, action='store_true',
57 parser.add_option('--bzip', default=False, action='store_true',
59 parser.add_option("--version", default=False, action="store_true",
60 help="report software version")
64 def open_output(output, opts):
65 """Open output file with right compression library
68 return bz2.open(output, 'wt')
70 return gzip.open(output, 'wt')
72 return open(output, 'w')
75 def file_generator(pattern_list):
76 """Given a list of glob patterns return decompressed streams
78 for pattern in pattern_list:
79 for filename in glob(pattern):
80 yield autoopen(filename, 'rt')
83 class DesplitFastq(object):
84 """Merge multiple fastq files into a single file"""
85 def __init__(self, sources, destination):
86 self.sources = sources
87 self.destination = destination
89 self.making_fastq = True
90 self.trim = slice(None)
95 This is here so we can run via threading/multiprocessing APIs
99 for stream in self.sources:
103 if state == SEQ_HEADER:
104 self.destination.write(line)
106 elif state == SEQUENCE:
107 self.destination.write(line[self.trim])
109 elif state == QUAL_HEADER:
110 self.destination.write(line)
112 elif state == QUALITY:
113 self.destination.write(line[self.trim])
115 self.destination.write(os.linesep)
118 raise RuntimeError("No files processed")
120 if __name__ == "__main__":