2395dc09654e3fef8cd0fcdfc1966cec335a1a13
[htsworkflow.git] / scripts / qseq2fastq
1 #!/usr/bin/env python
2
3 import os
4 from optparse import OptionParser
5 import numpy
6 import sys
7
8 def qseq2fastq(destination, qseqs, trim=None, pf=False):
9     for q in qseqs:
10         for line in open(q):
11             # parse line
12             record = line.strip().split('\t')
13             machine_name = record[0]
14             run_number = record[1]
15             lane_number = record[2]
16             tile = record[3]
17             x = record[4]
18             y = record[5]
19             index = record[6]
20             read = record[7]
21             sequence = record[8].replace('.','N')
22             # Illumina scores are Phred + 64
23             # Fastq scores are Phread + 33
24             # the following code grabs the string, converts to short ints and
25             # subtracts 31 (64-33) to convert between the two score formats.
26             # The numpy solution is twice as fast as some of my other
27             # ideas for the conversion.
28             # sorry about the uglyness in changing from character, to 8-bit int
29             # and back to a character array
30             quality = numpy.asarray(record[9],'c')
31             quality.dtype = numpy.uint8
32             quality -= 31
33             quality.dtype = '|S1' # I'd like to know what the real numpy char type is
34
35             if pf:
36                 pass_qc = record[10]
37                 pass_qc_msg = " pf=%s" % (pass_qc)
38             else:
39                 pass_qc_msg = ""
40                 
41             destination.write('@%s_%s:%s:%s:%s:%s/%s%s%s' % ( \
42                 machine_name,
43                 run_number,
44                 lane_number,
45                 tile,
46                 x,
47                 y,
48                 read,
49                 pass_qc_msg,
50                 os.linesep))
51             destination.write(sequence[trim])
52             destination.write(os.linesep)
53             destination.write('+')
54             destination.write(os.linesep)
55             destination.write(quality[trim].tostring())
56             destination.write(os.linesep)
57
58 def make_parser():
59     parser = OptionParser()
60     parser.add_option('-o', '--output', help='output fastq file', default=None)
61     parser.add_option('-s', '--slice',
62                       help='specify python slice, e.g. 0:75, 0:-1',
63                       default=None)
64     parser.add_option('--pf', help="report pass filter flag", default=False,
65                       action="store_true")
66     return parser
67
68 def parse_slice(slice_text):
69     if slice_text is None or len(slice_text) == 0:
70         return slice(None)
71         
72     slice_data = []
73     for element in slice_text.split(':'):
74         if len(element) == 0:
75             element = None
76         else:
77             element = int(element)
78         slice_data.append(element)
79
80     return slice(*slice_data)
81         
82
83 def main(cmdline=None):
84     parser = make_parser()
85     opts, args = parser.parse_args(cmdline)
86
87     if opts.output is not None:
88         dest = open(opts.output, 'w')
89     else:
90         dest = sys.stdout
91
92     subseq = parse_slice(opts.slice)
93
94     qseq2fastq(dest, args, subseq, opts.pf)
95     
96     return 0
97
98 if __name__ == "__main__":
99     main()