2 """Write fastq data from multiple compressed files into a single file
8 from optparse import OptionParser
11 from htsworkflow.util.version import version
12 from htsworkflow.util.opener import autoopen
13 from htsworkflow.util.conversion import parse_slice
22 def main(cmdline=None):
23 """Command line driver: [None, 'option', '*.fastq.bz2']
25 parser = make_parser()
26 opts, args = parser.parse_args(cmdline)
32 if opts.output is not None:
34 output = bz2.BZ2File(opts.output,'w')
36 output = gzip.GzipFile(opts.output, 'w')
38 output = open(opts.output, 'w')
42 desplitter = DesplitFastq(file_generator(args), output)
43 desplitter.trim = parse_slice(opts.slice)
50 """Generate an option parser for above main function"""
52 usage = '%prog: [options] *.fastq.gz'
53 parser = OptionParser(usage)
55 parser.add_option('-o', '--output', default=None,
56 help='output fastq file')
57 parser.add_option('-s', '--slice',
58 help="specify python slice, e.g. 0:75, 0:-1",
60 parser.add_option('--gzip', default=False, action='store_true',
62 parser.add_option('--bzip', default=False, action='store_true',
64 parser.add_option("--version", default=False, action="store_true",
65 help="report software version")
69 def file_generator(pattern_list):
70 """Given a list of glob patterns return decompressed streams
72 for pattern in pattern_list:
73 for filename in glob(pattern):
74 yield autoopen(filename, 'r')
77 class DesplitFastq(object):
78 """Merge multiple fastq files into a single file"""
79 def __init__(self, sources, destination):
80 self.sources = sources
81 self.destination = destination
83 self.making_fastq = True
84 self.trim = slice(None)
89 This is here so we can run via threading/multiprocessing APIs
92 for stream in self.sources:
95 if state == SEQ_HEADER:
96 self.destination.write(line)
98 elif state == SEQUENCE:
99 self.destination.write(line[self.trim])
101 elif state == QUAL_HEADER:
102 self.destination.write(line)
104 elif state == QUALITY:
105 self.destination.write(line[self.trim])
107 self.destination.write(os.linesep)
110 if __name__ == "__main__":