From 5672775e89a36cac2d8ae71148e0a701a16f2801 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Tue, 1 Dec 2009 22:19:10 +0000 Subject: [PATCH] Replace '.' with 'N' in the sequence from qseq files. Also add an option to include the pass filter state in the header (and a small code reorganization) --- scripts/qseq2fastq | 65 +++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/scripts/qseq2fastq b/scripts/qseq2fastq index 4262898..2395dc0 100644 --- a/scripts/qseq2fastq +++ b/scripts/qseq2fastq @@ -5,29 +5,7 @@ from optparse import OptionParser import numpy import sys -def make_parser(): - parser = OptionParser() - parser.add_option('-o', '--output', help='output fastq file', default=None) - parser.add_option('-s', '--slice', - help='specify python slice, e.g. 0:75, 0:-1', - default=None) - return parser - -def parse_slice(slice_text): - if slice_text is None or len(slice_text) == 0: - return slice(None) - - slice_data = [] - for element in slice_text.split(':'): - if len(element) == 0: - element = None - else: - element = int(element) - slice_data.append(element) - - return slice(*slice_data) - -def qseq2fastq(destination, qseqs, trim=None): +def qseq2fastq(destination, qseqs, trim=None, pf=False): for q in qseqs: for line in open(q): # parse line @@ -40,7 +18,7 @@ def qseq2fastq(destination, qseqs, trim=None): y = record[5] index = record[6] read = record[7] - sequence = record[8] + sequence = record[8].replace('.','N') # Illumina scores are Phred + 64 # Fastq scores are Phread + 33 # the following code grabs the string, converts to short ints and @@ -53,10 +31,14 @@ def qseq2fastq(destination, qseqs, trim=None): quality.dtype = numpy.uint8 quality -= 31 quality.dtype = '|S1' # I'd like to know what the real numpy char type is - - pass_qc = record[10] - destination.write('@%s_%s:%s:%s:%s:%s/%s pf=%s%s' % ( \ + if pf: + pass_qc = record[10] + pass_qc_msg = " pf=%s" % (pass_qc) + else: + pass_qc_msg = "" + + destination.write('@%s_%s:%s:%s:%s:%s/%s%s%s' % ( \ machine_name, run_number, lane_number, @@ -64,7 +46,7 @@ def qseq2fastq(destination, qseqs, trim=None): x, y, read, - pass_qc, + pass_qc_msg, os.linesep)) destination.write(sequence[trim]) destination.write(os.linesep) @@ -72,7 +54,32 @@ def qseq2fastq(destination, qseqs, trim=None): destination.write(os.linesep) destination.write(quality[trim].tostring()) destination.write(os.linesep) + +def make_parser(): + parser = OptionParser() + parser.add_option('-o', '--output', help='output fastq file', default=None) + parser.add_option('-s', '--slice', + help='specify python slice, e.g. 0:75, 0:-1', + default=None) + parser.add_option('--pf', help="report pass filter flag", default=False, + action="store_true") + return parser + +def parse_slice(slice_text): + if slice_text is None or len(slice_text) == 0: + return slice(None) + slice_data = [] + for element in slice_text.split(':'): + if len(element) == 0: + element = None + else: + element = int(element) + slice_data.append(element) + + return slice(*slice_data) + + def main(cmdline=None): parser = make_parser() opts, args = parser.parse_args(cmdline) @@ -84,7 +91,7 @@ def main(cmdline=None): subseq = parse_slice(opts.slice) - qseq2fastq(dest, args, subseq) + qseq2fastq(dest, args, subseq, opts.pf) return 0 -- 2.30.2