X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=makerdsfromeland2.py;h=66209eef973f36072ba964aac73f8579685aed99;hp=317ceda90359c6cda17541af71b39e400eee8b80;hb=0d3e3112fd04c2e6b44a25cacef1d591658ad181;hpb=5e4ae21098dba3d1edcf11e7279da0d84c3422e4 diff --git a/makerdsfromeland2.py b/makerdsfromeland2.py index 317ceda..66209ee 100755 --- a/makerdsfromeland2.py +++ b/makerdsfromeland2.py @@ -8,38 +8,23 @@ try: except: pass -import sys, string, optparse -from commoncode import readDataset +import sys +import string +import optparse +import ReadDataset +from commoncode import getConfigParser, getConfigOption, getConfigIntOption, getConfigBoolOption def main(argv=None): if not argv: argv = sys.argv - verstring = "%prog: version 3.4" + verstring = "makerdsfromeland2: version 3.5" print verstring usage = "usage: %prog label infilename outrdsfile [propertyName::propertyValue] [options]\ \ninput reads must be sorted to properly record multireads" - parser = optparse.OptionParser(usage=usage) - parser.add_option("--append", action="store_false", dest="init", - help="append to existing rds file [default: create new]") - parser.add_option("--RNA", dest="geneDataFileName", - help="set data type to RNA [default: DNA]") - parser.add_option("--index", action="store_true", dest="doIndex", - help="index the output rds file") - parser.add_option("--cache", type="int", dest="cachePages", - help="number of cache pages to use [default: 100000") - parser.add_option("--olddelimiter", action="store_true", dest="useOldDelimiter", - help="use : as the delimiter") - parser.add_option("--paired", dest="pairID", - help="pairID value") - parser.add_option("--extended", action="store_true", dest="extended", - help="use eland_extended input") - parser.add_option("--verbose", action="store_true", dest="verbose") - parser.add_option("--maxlines", type="int", dest="maxLines", - help="[default: 1000000000") - parser.set_defaults(init=True, doIndex=False, cachePages=100000, geneDataFileName=None, useOldDelimiter=False, pairID=None, maxLines=1000000000, extended=False, verbose=False) + parser = getParser(usage) (options, args) = parser.parse_args(argv[1:]) if len(args) < 3: @@ -68,10 +53,54 @@ def main(argv=None): if options.geneDataFileName is not None: dataType = 'RNA' - makeRDSFromEland2(label, filename, outdbname, options.doIndex, delimiter, paired, options.init, options.pairID, dataType, options.geneDataFileName, options.cachePages, options.maxLines, options.extended, options.verbose) + makeRDSFromEland2(label, filename, outdbname, options.doIndex, delimiter, paired, options.init, + options.pairID, dataType, options.geneDataFileName, options.cachePages, + options.maxLines, options.extended, options.verbose) + + +def getParser(usage): + parser = optparse.OptionParser(usage=usage) + parser.add_option("--append", action="store_false", dest="init", + help="append to existing rds file [default: create new]") + parser.add_option("--RNA", dest="geneDataFileName", + help="set data type to RNA [default: DNA]") + parser.add_option("--index", action="store_true", dest="doIndex", + help="index the output rds file") + parser.add_option("--cache", type="int", dest="cachePages", + help="number of cache pages to use [default: 100000") + parser.add_option("--olddelimiter", action="store_true", dest="useOldDelimiter", + help="use : as the delimiter") + parser.add_option("--paired", dest="pairID", + help="pairID value") + parser.add_option("--extended", action="store_true", dest="extended", + help="use eland_extended input") + parser.add_option("--verbose", action="store_true", dest="verbose") + parser.add_option("--maxlines", type="int", dest="maxLines", + help="[default: 1000000000") + + configParser = getConfigParser() + section = "makerdsfromeland2" + init = getConfigBoolOption(configParser, section, "init", True) + doIndex = getConfigBoolOption(configParser, section, "doIndex", False) + cachePages = getConfigIntOption(configParser, section, "cachePages", 100000) + geneDataFileName = getConfigOption(configParser, section, "geneDataFileName", None) + useOldDelimiter = getConfigBoolOption(configParser, section, "useOldDelimiter", False) + pairID = getConfigOption(configParser, section, "pairID", None) + maxLines = getConfigIntOption(configParser, section, "maxLines", 1000000000) + extended = getConfigBoolOption(configParser, section, "extended", False) + verbose = getConfigBoolOption(configParser, section, "verbose", False) + + parser.set_defaults(init=init, doIndex=doIndex, cachePages=cachePages, + geneDataFileName=geneDataFileName, useOldDelimiter=useOldDelimiter, + pairID=pairID, maxLines=maxLines, extended=extended, verbose=verbose) + + return parser + +def makeRDSFromEland2(label, filename, outdbname, doIndex=False, delimiter="|", paired=False, + init=True, pairID="1", dataType="DNA", geneDataFileName=None, + cachePages=100000, maxLines=1000000000, extended=False, verbose=False): -def makeRDSFromEland2(label, filename, outdbname, doIndex=False, delimiter="|", paired=False, init=True, pairID="1", dataType="DNA", geneDataFileName=None, cachePages=100000, maxLines=1000000000, extended=False, verbose=False): maxBorder = 0 index = 0 insertSize = 100000 @@ -104,7 +133,7 @@ def makeRDSFromEland2(label, filename, outdbname, doIndex=False, delimiter="|", mapDict[uname] = [] genedatafile.close() - rds = readDataset(outdbname, init, dataType, verbose=True) + rds = ReadDataset.ReadDataset(outdbname, init, dataType, verbose=True) if cachePages > rds.getDefaultCacheSize(): if init: