convert several not covered by unit-test modules to use print function
[htsworkflow.git] / htsworkflow / pipelines / fastq.py
1 '''summarize fastq file
2 '''
3 from __future__ import print_function
4
5 import os
6 import numpy
7
8 def summarize_hiseq_fastq(stream):
9     reads = 0
10     pass_qc = 0
11     bad_read = False
12     mean = None
13     eol_length = len(os.linesep)
14
15     for i, line in enumerate(stream):
16         if i % 4 == 0:
17             # header  looks like this
18             # @HWI-ST0787:114:D0PMDACXX:8:1101:1605:2154 1:N:0:TAGCTT
19             # we want the :N (passed filter) or :Y (failed filter)
20             reads += 1
21             # if flag is 'N' we are not a bad read
22             bad_read = False if line[line.rfind(' ') + 3] == 'N' else True
23             if not bad_read:
24                 pass_qc += 1
25
26         elif i % 4 == 3:
27             # score
28             if not bad_read:
29                 # don't include bad reads in score
30                 # score = numpy.asarray(list(line.rstrip()), dtype='c') # 3.5 min
31                 #score = numpy.asarray(line[:-eol_length], dtype='c') # 2 min
32                 score = numpy.asarray(line[:-eol_length], dtype='c') # 1.4 min
33                 score.dtype = numpy.int8
34
35                 if mean is None:
36                     mean = numpy.zeros(len(score), dtype=numpy.float)
37
38                 delta = score - mean
39                 mean = mean + delta / pass_qc
40
41     return (reads, pass_qc, mean)
42
43 if __name__ == '__main__':
44     import sys
45     from autoopen import autoopen
46     with autoopen(sys.argv[1], 'r') as instream:
47         print(summarize_hiseq_fastq(instream))