#!/usr/bin/env python import os from optparse import OptionParser import numpy import sys def qseq2fastq(destination, qseqs, trim=None, pf=False): for q in qseqs: for line in open(q): # parse line record = line.strip().split('\t') machine_name = record[0] run_number = record[1] lane_number = record[2] tile = record[3] x = record[4] y = record[5] index = record[6] read = record[7] sequence = record[8].replace('.','N') # Illumina scores are Phred + 64 # Fastq scores are Phread + 33 # the following code grabs the string, converts to short ints and # subtracts 31 (64-33) to convert between the two score formats. # The numpy solution is twice as fast as some of my other # ideas for the conversion. # sorry about the uglyness in changing from character, to 8-bit int # and back to a character array quality = numpy.asarray(record[9],'c') quality.dtype = numpy.uint8 quality -= 31 quality.dtype = '|S1' # I'd like to know what the real numpy char type is if pf: pass_qc = record[10] pass_qc_msg = " pf=%s" % (pass_qc) else: pass_qc_msg = "" destination.write('@%s_%s:%s:%s:%s:%s/%s%s%s' % ( \ machine_name, run_number, lane_number, tile, x, y, read, pass_qc_msg, os.linesep)) destination.write(sequence[trim]) destination.write(os.linesep) destination.write('+') destination.write(os.linesep) destination.write(quality[trim].tostring()) destination.write(os.linesep) def make_parser(): usage = "%prog: [options] *_qseq.txt" parser = OptionParser(usage) parser.add_option('-o', '--output', help='output fastq file', default=None) parser.add_option('-s', '--slice', help='specify python slice, e.g. 0:75, 0:-1', default=None) parser.add_option('--pf', help="report pass filter flag", default=False, action="store_true") return parser def parse_slice(slice_text): if slice_text is None or len(slice_text) == 0: return slice(None) slice_data = [] for element in slice_text.split(':'): if len(element) == 0: element = None else: element = int(element) slice_data.append(element) return slice(*slice_data) def main(cmdline=None): parser = make_parser() opts, args = parser.parse_args(cmdline) if opts.output is not None: dest = open(opts.output, 'w') else: dest = sys.stdout subseq = parse_slice(opts.slice) qseq2fastq(dest, args, subseq, opts.pf) return 0 if __name__ == "__main__": main()