4 print "%prog: version 1.1"
10 usage = "usage: python %prog infile outfile [--prefix contigpref] [--filter pslfile] [--min bp] [--keepcov]"
12 parser = optparse.OptionParser(usage=usage)
13 parser.add_option("--prefix", dest="contigPrefix")
14 parser.add_option("--filter", dest="filterFileName")
15 parser.add_option("--min", type="int", dest="minSize")
16 parser.add_option("--keepcov", action="store_true", dest="keepCoverage")
17 parser.set_defaults(contigPrefix="chr", filterFileName="", minSize=0, keepCoverage=False)
18 (options, args) = parser.parse_args(argv[1:])
27 processvelvet(infile, outfile, options.contigPrefix, options.filterFileName, options.minSize, options.keepCoverage)
30 def processvelvet(inFileName, outFileName, contigPrefix="chr", filterFileName="", minSize=0, keepCoverage=False):
31 infile = open(inFileName)
32 outfile = open(outFileName, "w")
33 filterList = getFilterList(filterFileName)
35 node = {"contigPrefix": contigPrefix,
40 counts = {"acceptedSize": 0,
48 writeNode(outfile, node, filterList, counts, minSize, keepCoverage)
49 node["completeID"] = line.strip()[1:]
50 node["currentSeq"] = ""
52 node["currentSeq"] += line
54 writeNode(outfile, node, filterList, counts, minSize, keepCoverage)
59 print "%d contigs accepted" % counts["contigsAccepted"]
60 print "%d bp original" % (counts["acceptedSize"] + counts["filteredSize"])
61 print "%d bp accepted" % counts["acceptedSize"]
62 print "%d bp accepted N" % counts["nSize"]
63 print "%d bp filtered\n" % counts["filteredSize"]
66 def getFilterList(filterFileName=""):
71 filterFile = open(filterFileName)
75 for line in filterFile:
77 fields = line.strip().split()
83 if exclude not in filterList:
84 filterList.append(exclude)
91 def writeNode(outfile, node, filterList, counts, minSize=0, keepCoverage=False):
92 completeID = node["completeID"]
93 currentSeq = node["currentSeq"]
94 sequenceLength = len(currentSeq) - currentSeq.count("\n")
95 if len(completeID) > 5 and completeID not in filterList:
96 fields = completeID.split("_")
99 newID = fields[1] + "_" + fields[-1].strip()
101 if sequenceLength >= minSize:
102 outfile.write(">%s%s\n%s" % (node["contigPrefix"], newID, currentSeq))
103 counts["acceptedSize"] += sequenceLength
104 counts["nSize"] += currentSeq.count("N")
105 counts["contigsAccepted"] += 1
107 counts["filteredSize"] += sequenceLength
109 if __name__ == "__main__":