Replace '.' with 'N' in the sequence from qseq files.
authorDiane Trout <diane@caltech.edu>
Tue, 1 Dec 2009 22:19:10 +0000 (22:19 +0000)
committerDiane Trout <diane@caltech.edu>
Tue, 1 Dec 2009 22:19:10 +0000 (22:19 +0000)
Also add an option to include the pass filter state in the header
(and a small code reorganization)

scripts/qseq2fastq

index 42628980d4f8b78809791ebd9bff1df92a3280c6..2395dc09654e3fef8cd0fcdfc1966cec335a1a13 100644 (file)
@@ -5,29 +5,7 @@ from optparse import OptionParser
 import numpy
 import sys
 
-def make_parser():
-    parser = OptionParser()
-    parser.add_option('-o', '--output', help='output fastq file', default=None)
-    parser.add_option('-s', '--slice',
-                      help='specify python slice, e.g. 0:75, 0:-1',
-                      default=None)
-    return parser
-
-def parse_slice(slice_text):
-    if slice_text is None or len(slice_text) == 0:
-        return slice(None)
-        
-    slice_data = []
-    for element in slice_text.split(':'):
-        if len(element) == 0:
-            element = None
-        else:
-            element = int(element)
-        slice_data.append(element)
-
-    return slice(*slice_data)
-        
-def qseq2fastq(destination, qseqs, trim=None):
+def qseq2fastq(destination, qseqs, trim=None, pf=False):
     for q in qseqs:
         for line in open(q):
             # parse line
@@ -40,7 +18,7 @@ def qseq2fastq(destination, qseqs, trim=None):
             y = record[5]
             index = record[6]
             read = record[7]
-            sequence = record[8]
+            sequence = record[8].replace('.','N')
             # Illumina scores are Phred + 64
             # Fastq scores are Phread + 33
             # the following code grabs the string, converts to short ints and
@@ -53,10 +31,14 @@ def qseq2fastq(destination, qseqs, trim=None):
             quality.dtype = numpy.uint8
             quality -= 31
             quality.dtype = '|S1' # I'd like to know what the real numpy char type is
-            
-            pass_qc = record[10]
 
-            destination.write('@%s_%s:%s:%s:%s:%s/%s pf=%s%s' % ( \
+            if pf:
+                pass_qc = record[10]
+                pass_qc_msg = " pf=%s" % (pass_qc)
+            else:
+                pass_qc_msg = ""
+                
+            destination.write('@%s_%s:%s:%s:%s:%s/%s%s%s' % ( \
                 machine_name,
                 run_number,
                 lane_number,
@@ -64,7 +46,7 @@ def qseq2fastq(destination, qseqs, trim=None):
                 x,
                 y,
                 read,
-                pass_qc,
+                pass_qc_msg,
                 os.linesep))
             destination.write(sequence[trim])
             destination.write(os.linesep)
@@ -72,7 +54,32 @@ def qseq2fastq(destination, qseqs, trim=None):
             destination.write(os.linesep)
             destination.write(quality[trim].tostring())
             destination.write(os.linesep)
+
+def make_parser():
+    parser = OptionParser()
+    parser.add_option('-o', '--output', help='output fastq file', default=None)
+    parser.add_option('-s', '--slice',
+                      help='specify python slice, e.g. 0:75, 0:-1',
+                      default=None)
+    parser.add_option('--pf', help="report pass filter flag", default=False,
+                      action="store_true")
+    return parser
+
+def parse_slice(slice_text):
+    if slice_text is None or len(slice_text) == 0:
+        return slice(None)
         
+    slice_data = []
+    for element in slice_text.split(':'):
+        if len(element) == 0:
+            element = None
+        else:
+            element = int(element)
+        slice_data.append(element)
+
+    return slice(*slice_data)
+        
+
 def main(cmdline=None):
     parser = make_parser()
     opts, args = parser.parse_args(cmdline)
@@ -84,7 +91,7 @@ def main(cmdline=None):
 
     subseq = parse_slice(opts.slice)
 
-    qseq2fastq(dest, args, subseq)
+    qseq2fastq(dest, args, subseq, opts.pf)
     
     return 0