5 # Created by Ali Mortazavi on 8/12/08.
10 from cistematic.core import complement
11 from commoncode import getConfigParser, getConfigBoolOption, getConfigOption
13 print "trimreads: version 2.2"
19 usage = "usage: python %prog length infile outfile [--fastq] [--fromback] [--paired] [--flip] [--filter maxN]"
21 parser = getParser(usage)
22 (options, args) = parser.parse_args(argv[1:])
26 print "\t where paired fragments are separated by a : when given the -paired flag"
33 trimreads(length, infile, outfile, options.fastq, options.fromBack, options.paired, options.flipseq, options.maxN)
37 parser = optparse.OptionParser(usage=usage)
38 parser.add_option("--fastq", action="store_true", dest="fastq")
39 parser.add_option("--fromback", action="store_true", dest="fromBack")
40 parser.add_option("--paired", action="store_true", dest="paired")
41 parser.add_option("--flip", action="store_true", dest="flipseq")
42 parser.add_option("--filter", type="int", dest="maxN")
44 configParser = getConfigParser()
46 fastq = getConfigBoolOption(configParser, section, "fastq", False)
47 fromBack = getConfigBoolOption(configParser, section, "fromBack", False)
48 paired = getConfigBoolOption(configParser, section, "paired", False)
49 flipseq = getConfigBoolOption(configParser, section, "flipseq", False)
50 maxN = getConfigOption(configParser, section, "maxN", None)
52 parser.set_defaults(fastq=fastq, fromBack=fromBack, paired=paired, flipseq=flipseq, maxN=maxN)
57 def trimreads(length, inFileName, outFileName, fastq=False, fromBack=False, paired=False, flipseq=False, maxN=None):
58 infile = open(inFileName)
59 outfile = open(outFileName, "w")
62 pairedlength = 2 * length
71 print "filtering out reads with more than %d Ns" % maxN
75 print "trimming reads from %s to %d bp and saving them in %s" % (inFileName, length, outFileName)
85 if (not fastq and firstChar == ">") or (fastq and firstChar in ["@", "+"]):
89 if line.count("N") > maxN:
98 seq1 = complement(tempseq1)
104 seq2 = complement(tempseq2)
109 if len(line) < pairedlength:
112 outfile.write("%s%s:%s\n" % (header, seq1, seq2))
115 outfile.write("%s%s\n" % (header, seq1))
117 outfile.write("%s%s\n" % (header, seq2))
120 if index % 1000000 == 0:
126 print "returned %d reads" % index
128 print "%d additional reads filtered" % filtered
131 if __name__ == "__main__":