import numpy
import sys
-def make_parser():
- parser = OptionParser()
- parser.add_option('-o', '--output', help='output fastq file', default=None)
- parser.add_option('-s', '--slice',
- help='specify python slice, e.g. 0:75, 0:-1',
- default=None)
- return parser
-
-def parse_slice(slice_text):
- if slice_text is None or len(slice_text) == 0:
- return slice(None)
-
- slice_data = []
- for element in slice_text.split(':'):
- if len(element) == 0:
- element = None
- else:
- element = int(element)
- slice_data.append(element)
-
- return slice(*slice_data)
-
-def qseq2fastq(destination, qseqs, trim=None):
+def qseq2fastq(destination, qseqs, trim=None, pf=False):
for q in qseqs:
for line in open(q):
# parse line
y = record[5]
index = record[6]
read = record[7]
- sequence = record[8]
+ sequence = record[8].replace('.','N')
# Illumina scores are Phred + 64
# Fastq scores are Phread + 33
# the following code grabs the string, converts to short ints and
quality.dtype = numpy.uint8
quality -= 31
quality.dtype = '|S1' # I'd like to know what the real numpy char type is
-
- pass_qc = record[10]
- destination.write('@%s_%s:%s:%s:%s:%s/%s pf=%s%s' % ( \
+ if pf:
+ pass_qc = record[10]
+ pass_qc_msg = " pf=%s" % (pass_qc)
+ else:
+ pass_qc_msg = ""
+
+ destination.write('@%s_%s:%s:%s:%s:%s/%s%s%s' % ( \
machine_name,
run_number,
lane_number,
x,
y,
read,
- pass_qc,
+ pass_qc_msg,
os.linesep))
destination.write(sequence[trim])
destination.write(os.linesep)
destination.write(os.linesep)
destination.write(quality[trim].tostring())
destination.write(os.linesep)
+
+def make_parser():
+ parser = OptionParser()
+ parser.add_option('-o', '--output', help='output fastq file', default=None)
+ parser.add_option('-s', '--slice',
+ help='specify python slice, e.g. 0:75, 0:-1',
+ default=None)
+ parser.add_option('--pf', help="report pass filter flag", default=False,
+ action="store_true")
+ return parser
+
+def parse_slice(slice_text):
+ if slice_text is None or len(slice_text) == 0:
+ return slice(None)
+ slice_data = []
+ for element in slice_text.split(':'):
+ if len(element) == 0:
+ element = None
+ else:
+ element = int(element)
+ slice_data.append(element)
+
+ return slice(*slice_data)
+
+
def main(cmdline=None):
parser = make_parser()
opts, args = parser.parse_args(cmdline)
subseq = parse_slice(opts.slice)
- qseq2fastq(dest, args, subseq)
+ qseq2fastq(dest, args, subseq, opts.pf)
return 0