5 # Created by Ali Mortazavi on 8/12/08.
9 from cistematic.core import complement
11 print "%prog: version 2.1"
17 usage = "usage: python %prog length infile outfile [--fastq] [--fromback] [--paired] [--flip] [--filter maxN]"
19 parser = optparse.OptionParser(usage=usage)
20 parser.add_option("--fastq", action="store_true", dest="fastq")
21 parser.add_option("--fromback", action="store_true", dest="fromBack")
22 parser.add_option("--paired", action="store_true", dest="paired")
23 parser.add_option("--flip", action="store_true", dest="flipseq")
24 parser.add_option("--filter", type="int", dest="maxN")
25 parser.set_defaults(fastq=False, fromBack=False, paired=False, flipseq=False, maxN=None)
26 (options, args) = parser.parse_args(argv[1:])
30 print "\t where paired fragments are separated by a : when given the -paired flag"
37 trimreads(length, infile, outfile, options.fastq, options.fromBack, options.paired, options.flipseq, options.maxN)
40 def trimreads(length, inFileName, outFileName, fastq=False, fromBack=False, paired=False, flipseq=False, maxN=None):
41 infile = open(inFileName)
42 outfile = open(outFileName, "w")
45 pairedlength = 2 * length
54 print "filtering out reads with more than %d Ns" % maxN
58 print "trimming reads from %s to %d bp and saving them in %s" % (inFileName, length, outFileName)
68 if (not fastq and firstChar == ">") or (fastq and firstChar in ["@", "+"]):
72 if line.count("N") > maxN:
81 seq1 = complement(tempseq1)
87 seq2 = complement(tempseq2)
92 if len(line) < pairedlength:
95 outfile.write("%s%s:%s\n" % (header, seq1, seq2))
98 outfile.write("%s%s\n" % (header, seq1))
100 outfile.write("%s%s\n" % (header, seq2))
103 if index % 1000000 == 0:
109 print "returned %d reads" % index
111 print "%d additional reads filtered" % filtered
114 if __name__ == "__main__":