Update the usage string for qseq2fastq
[htsworkflow.git] / scripts / qseq2fastq
1 #!/usr/bin/env python
2
3 import os
4 from optparse import OptionParser
5 import numpy
6 import sys
7
8 def qseq2fastq(destination, qseqs, trim=None, pf=False):
9     for q in qseqs:
10         for line in open(q):
11             # parse line
12             record = line.strip().split('\t')
13             machine_name = record[0]
14             run_number = record[1]
15             lane_number = record[2]
16             tile = record[3]
17             x = record[4]
18             y = record[5]
19             index = record[6]
20             read = record[7]
21             sequence = record[8].replace('.','N')
22             # Illumina scores are Phred + 64
23             # Fastq scores are Phread + 33
24             # the following code grabs the string, converts to short ints and
25             # subtracts 31 (64-33) to convert between the two score formats.
26             # The numpy solution is twice as fast as some of my other
27             # ideas for the conversion.
28             # sorry about the uglyness in changing from character, to 8-bit int
29             # and back to a character array
30             quality = numpy.asarray(record[9],'c')
31             quality.dtype = numpy.uint8
32             quality -= 31
33             quality.dtype = '|S1' # I'd like to know what the real numpy char type is
34
35             if pf:
36                 pass_qc = record[10]
37                 pass_qc_msg = " pf=%s" % (pass_qc)
38             else:
39                 pass_qc_msg = ""
40                 
41             destination.write('@%s_%s:%s:%s:%s:%s/%s%s%s' % ( \
42                 machine_name,
43                 run_number,
44                 lane_number,
45                 tile,
46                 x,
47                 y,
48                 read,
49                 pass_qc_msg,
50                 os.linesep))
51             destination.write(sequence[trim])
52             destination.write(os.linesep)
53             destination.write('+')
54             destination.write(os.linesep)
55             destination.write(quality[trim].tostring())
56             destination.write(os.linesep)
57
58 def make_parser():
59     usage = "%prog: [options] *_qseq.txt"
60     parser = OptionParser(usage)
61     parser.add_option('-o', '--output', help='output fastq file', default=None)
62     parser.add_option('-s', '--slice',
63                       help='specify python slice, e.g. 0:75, 0:-1',
64                       default=None)
65     parser.add_option('--pf', help="report pass filter flag", default=False,
66                       action="store_true")
67     return parser
68
69 def parse_slice(slice_text):
70     if slice_text is None or len(slice_text) == 0:
71         return slice(None)
72         
73     slice_data = []
74     for element in slice_text.split(':'):
75         if len(element) == 0:
76             element = None
77         else:
78             element = int(element)
79         slice_data.append(element)
80
81     return slice(*slice_data)
82         
83
84 def main(cmdline=None):
85     parser = make_parser()
86     opts, args = parser.parse_args(cmdline)
87
88     if opts.output is not None:
89         dest = open(opts.output, 'w')
90     else:
91         dest = sys.stdout
92
93     subseq = parse_slice(opts.slice)
94
95     qseq2fastq(dest, args, subseq, opts.pf)
96     
97     return 0
98
99 if __name__ == "__main__":
100     main()