2 """Write fastq data from multiple compressed files into a single file
7 from optparse import OptionParser
10 from htsworkflow.util.version import version
11 from htsworkflow.util.opener import autoopen
12 from htsworkflow.util.conversion import parse_slice
21 def main(cmdline=None):
22 """Command line driver: [None, 'option', '*.fastq.bz2']
24 parser = make_parser()
25 opts, args = parser.parse_args(cmdline)
31 if opts.output is not None:
32 output = open(opts.output, 'w')
36 desplitter = DesplitFastq(file_generator(args), output)
37 desplitter.trim = parse_slice(opts.slice)
44 """Generate an option parser for above main function"""
46 usage = '%prog: [options] *.fastq.gz'
47 parser = OptionParser(usage)
49 parser.add_option('-o', '--output', default=None,
50 help='output fastq file')
51 parser.add_option('-s', '--slice',
52 help="specify python slice, e.g. 0:75, 0:-1",
54 parser.add_option("--version", default=False, action="store_true",
55 help="report software version")
59 def file_generator(pattern_list):
60 """Given a list of glob patterns return decompressed streams
62 for pattern in pattern_list:
63 for filename in glob(pattern):
64 yield autoopen(filename, 'r')
67 class DesplitFastq(object):
68 """Merge multiple fastq files into a single file"""
69 def __init__(self, sources, destination):
70 self.sources = sources
71 self.destination = destination
73 self.making_fastq = True
74 self.trim = slice(None)
79 This is here so we can run via threading/multiprocessing APIs
82 for stream in self.sources:
85 if state == SEQ_HEADER:
86 self.destination.write(line)
88 elif state == SEQUENCE:
89 self.destination.write(line[self.trim])
91 elif state == QUAL_HEADER:
92 self.destination.write(line)
94 elif state == QUALITY:
95 self.destination.write(line[self.trim])
97 self.destination.write(os.linesep)
100 if __name__ == "__main__":