14 from commoncode import getConfigParser, getConfigBoolOption, getConfigIntOption
16 print "rdsmetadata: version 2.8"
23 usage = "usage: python %prog rdsfile [propertyName1::propertyValue1] ... [propertyNameN::propertyValueN] [options]"
25 parser = makeParser(usage)
26 (options, args) = parser.parse_args(argv[1:])
30 print "where the optional metadata name::value pairs are added to the existing dataset"
38 (pname, pvalue) = arg.strip().split("::")
39 print "adding %s : %s" % (pname, pvalue)
40 propertyList.append((pname, pvalue))
42 rdsmetadata(datafile, propertyList, options.cacheVal, options.buildIndex,
43 options.dropIndex, options.doCount, options.doComplexity,
44 options.resetFlags, options.rnaDataType, options.cachePages)
47 def makeParser(usage=""):
48 parser = optparse.OptionParser(usage=usage)
49 parser.add_option("--defaultcache", type="int", dest="cacheVal")
50 parser.add_option("--index", action="store_true", dest="buildIndex")
51 parser.add_option("--dropindex", action="store_true", dest="dropIndex")
52 parser.add_option("--nocount", action="store_false", dest="doCount")
53 parser.add_option("--complexity", action="store_true", dest="doComplexity")
54 parser.add_option("--reset", action="store_true", dest="resetFlags")
55 parser.add_option("--initrna", action="store_true", dest="rnaDataType")
56 parser.add_option("--cache", type="int", dest="cachePages")
58 configParser = getConfigParser()
59 section = "rdsmetadata"
60 cacheVal = getConfigIntOption(configParser, section, "cacheVal", 0)
61 buildIndex = getConfigBoolOption(configParser, section, "buildIndex", False)
62 dropIndex = getConfigBoolOption(configParser, section, "dropIndex", False)
63 doCount = getConfigBoolOption(configParser, section, "doCount", True)
64 doComplexity = getConfigBoolOption(configParser, section, "doComplexity", False)
65 resetFlags = getConfigBoolOption(configParser, section, "resetFlags", False)
66 rnaDataType = getConfigBoolOption(configParser, section, "rnaDataType", False)
67 cachePages = getConfigIntOption(configParser, section, "cachePages", -1)
69 parser.set_defaults(cacheVal=cacheVal, buildIndex=buildIndex, dropIndex=dropIndex, doCount=doCount,
70 doComplexity=doComplexity, resetFlags=resetFlags, rnaDataType=rnaDataType,
71 cachePages=cachePages)
76 def rdsmetadata(datafile, propertyList=[], cacheVal=0, buildIndex=False,
77 dropIndex=False, doCount=True, doComplexity=False, resetFlags=False,
78 rnaDataType=False, cachePages=-1):
85 rds = ReadDataset.ReadDataset(datafile, initialize=True, datasetType="RNA", verbose=True, cache=doCache)
87 rds = ReadDataset.ReadDataset(datafile, verbose=True, reportCount=doCount, cache=doCache)
89 if cachePages > rds.getDefaultCacheSize():
90 rds.setDBcache(cachePages)
93 rds.setDBcache(cacheVal, default=True)
94 print "set default cache size to %d pages" % cacheVal
97 print "clearing read flags"
104 print "could not drop index"
107 print "building index...."
109 rds.buildIndex(cacheVal)
114 print "calculating uniq read complexity..."
115 uniqs = rds.getUniqsCount(distinct=False)
116 distincts = rds.getUniqsCount(distinct=True)
117 print "%d distincts / %d uniqs = %.2f" % (distincts, uniqs, float(distincts) / uniqs)
119 if len(propertyList) > 0:
120 rds.insertMetadata(propertyList)
123 if __name__ == "__main__":