--- /dev/null
+#!/usr/bin/env python
+
+import os
+from optparse import OptionParser
+import numpy
+import sys
+
+def make_parser():
+ parser = OptionParser()
+ parser.add_option('-o', '--output', help='output fastq file', default=None)
+ parser.add_option('-s', '--slice',
+ help='specify python slice, e.g. 0:75, 0:-1',
+ default=None)
+ return parser
+
+def parse_slice(slice_text):
+ if slice_text is None or len(slice_text) == 0:
+ return slice(None)
+
+ slice_data = []
+ for element in slice_text.split(':'):
+ if len(element) == 0:
+ element = None
+ else:
+ element = int(element)
+ slice_data.append(element)
+
+ return slice(*slice_data)
+
+def qseq2fastq(destination, qseqs, trim=None):
+ for q in qseqs:
+ for line in open(q):
+ # parse line
+ record = line.strip().split('\t')
+ machine_name = record[0]
+ run_number = record[1]
+ lane_number = record[2]
+ tile = record[3]
+ x = record[4]
+ y = record[5]
+ index = record[6]
+ read = record[7]
+ sequence = record[8]
+ # Illumina scores are Phred + 64
+ # Fastq scores are Phread + 33
+ # the following code grabs the string, converts to short ints and
+ # subtracts 31 (64-33) to convert between the two score formats.
+ # The numpy solution is twice as fast as some of my other
+ # ideas for the conversion.
+ # sorry about the uglyness in changing from character, to 8-bit int
+ # and back to a character array
+ quality = numpy.asarray(record[9],'c')
+ quality.dtype = numpy.uint8
+ quality -= 31
+ quality.dtype = '|S1' # I'd like to know what the real numpy char type is
+
+ pass_qc = record[10]
+
+ destination.write('@%s_%s:%s:%s:%s:%s pf=%s%s' % ( \
+ machine_name,
+ run_number,
+ lane_number,
+ tile,
+ x,
+ y,
+ pass_qc,
+ os.linesep))
+ destination.write(sequence[trim])
+ destination.write(os.linesep)
+ destination.write('+')
+ destination.write(os.linesep)
+ destination.write(quality[trim].tostring())
+ destination.write(os.linesep)
+
+def main(cmdline=None):
+ parser = make_parser()
+ opts, args = parser.parse_args(cmdline)
+
+ if opts.output is not None:
+ dest = open(opts.output, 'w')
+ else:
+ dest = sys.stdout
+
+ subseq = parse_slice(opts.slice)
+
+ qseq2fastq(dest, args, subseq)
+
+ return 0
+
+if __name__ == "__main__":
+ main()