1 '''summarize fastq file
3 from __future__ import print_function
8 def summarize_hiseq_fastq(stream):
13 eol_length = len(os.linesep)
15 for i, line in enumerate(stream):
17 # header looks like this
18 # @HWI-ST0787:114:D0PMDACXX:8:1101:1605:2154 1:N:0:TAGCTT
19 # we want the :N (passed filter) or :Y (failed filter)
21 # if flag is 'N' we are not a bad read
22 bad_read = False if line[line.rfind(' ') + 3] == 'N' else True
29 # don't include bad reads in score
30 # score = numpy.asarray(list(line.rstrip()), dtype='c') # 3.5 min
31 #score = numpy.asarray(line[:-eol_length], dtype='c') # 2 min
32 score = numpy.asarray(line[:-eol_length], dtype='c') # 1.4 min
33 score.dtype = numpy.int8
36 mean = numpy.zeros(len(score), dtype=numpy.float)
39 mean = mean + delta / pass_qc
41 return (reads, pass_qc, mean)
43 if __name__ == '__main__':
45 from autoopen import autoopen
46 with autoopen(sys.argv[1], 'r') as instream:
47 print(summarize_hiseq_fastq(instream))