3 from erange.commoncode import getConfigParser, getConfigOption, getConfigIntOption, getConfigBoolOption
5 print "processvelvet: version 1.2"
11 usage = "usage: python %prog infile outfile [--prefix contigpref] [--filter pslfile] [--min bp] [--keepcov]"
13 parser = getParser(usage)
14 (options, args) = parser.parse_args(argv[1:])
23 processvelvet(infile, outfile, options.contigPrefix, options.filterFileName, options.minSize, options.keepCoverage)
27 parser = optparse.OptionParser(usage=usage)
28 parser.add_option("--prefix", dest="contigPrefix")
29 parser.add_option("--filter", dest="filterFileName")
30 parser.add_option("--min", type="int", dest="minSize")
31 parser.add_option("--keepcov", action="store_true", dest="keepCoverage")
33 configParser = getConfigParser()
34 section = "processvelvet"
35 contigPrefix = getConfigOption(configParser, section, "contigPrefix", "chr")
36 filterFileName = getConfigOption(configParser, section, "filterFileName", "")
37 minSize = getConfigIntOption(configParser, section, "minSize", 0)
38 keepCoverage = getConfigBoolOption(configParser, section, "keepCoverage", False)
40 parser.set_defaults(contigPrefix=contigPrefix, filterFileName=filterFileName, minSize=minSize, keepCoverage=keepCoverage)
45 def processvelvet(inFileName, outFileName, contigPrefix="chr", filterFileName="", minSize=0, keepCoverage=False):
46 infile = open(inFileName)
47 outfile = open(outFileName, "w")
48 filterList = getFilterList(filterFileName)
50 node = {"contigPrefix": contigPrefix,
55 counts = {"acceptedSize": 0,
63 writeNode(outfile, node, filterList, counts, minSize, keepCoverage)
64 node["completeID"] = line.strip()[1:]
65 node["currentSeq"] = ""
67 node["currentSeq"] += line
69 writeNode(outfile, node, filterList, counts, minSize, keepCoverage)
74 print "%d contigs accepted" % counts["contigsAccepted"]
75 print "%d bp original" % (counts["acceptedSize"] + counts["filteredSize"])
76 print "%d bp accepted" % counts["acceptedSize"]
77 print "%d bp accepted N" % counts["nSize"]
78 print "%d bp filtered\n" % counts["filteredSize"]
81 def getFilterList(filterFileName=""):
86 filterFile = open(filterFileName)
90 for line in filterFile:
92 fields = line.strip().split()
98 if exclude not in filterList:
99 filterList.append(exclude)
106 def writeNode(outfile, node, filterList, counts, minSize=0, keepCoverage=False):
107 completeID = node["completeID"]
108 currentSeq = node["currentSeq"]
109 sequenceLength = len(currentSeq) - currentSeq.count("\n")
110 if len(completeID) > 5 and completeID not in filterList:
111 fields = completeID.split("_")
114 newID = fields[1] + "_" + fields[-1].strip()
116 if sequenceLength >= minSize:
117 outfile.write(">%s%s\n%s" % (node["contigPrefix"], newID, currentSeq))
118 counts["acceptedSize"] += sequenceLength
119 counts["nSize"] += currentSeq.count("N")
120 counts["contigsAccepted"] += 1
122 counts["filteredSize"] += sequenceLength
124 if __name__ == "__main__":