3 from optparse import OptionParser
8 def main(cmdline=None):
10 opts, args = parser.parse_args(cmdline)
12 for filename in args[1:]:
13 stream = open(filename, 'r')
15 validate_fastq(f, opts.uniform_lengths)
20 parser = OptionParser()
21 parser.add_option("--fastq", action="store_true", default=False,
22 help="verify arguments are valid fastq file")
23 parser.add_option("--uniform-lengths", action="store_true", default=False,
24 help="require all reads to be of the same length")
29 def validate_fastq(stream, uniform_length=False):
30 """Validate that a fastq file isn't corrupted
32 uniform_length - requires that all sequence & qualities must be
35 returns number of errors found
42 h1_re = re.compile("^>[ \t\w]*$")
43 seq_re = re.compile("^[AGCT.N]+$", re.IGNORECASE)
44 h2_re = re.compile("^@[ \t\w]*$")
45 phred33 = re.compile("^[!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJ]+$")
46 phred64 = re.compile("^[@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh]+$")
55 # reset length at start of new record for non-uniform check
56 if not uniform_length:
58 # start of record checks
59 errors = validate_re(h1_re, line, line_number, errors,
63 errors = validate_re(seq_re, line, line_number, errors,
65 length, errors = validate_length(line, length, line_number,
70 errors = validate_re(h2_re, line, line_number, errors, "FAIL H2")
72 elif state == FQ_QUAL:
73 errors = validate_re(phred64, line, line_number, errors,
75 length, errors = validate_length(line, length, line_number, errors,
79 raise RuntimeError("Invalid state: %d" % (state,))
83 def validate_re(pattern, line, line_number, error_count, errmsg):
84 if pattern.match(line) is None:
85 print errmsg, "[%d]: %s" % (line_number, line)
89 def validate_length(line, line_length, line_number, error_count, errmsg):
91 if line_length is None, sets it
93 if line_length is None:
94 line_length = len(line)
95 elif len(line) != line_length:
96 print errmsg, "%d: %s" %(line_number, line)
98 return line_length, error_count