X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=combinerds.py;fp=combinerds.py;h=2878423a9716a0e4f31d0a1738a0770339c8ed6c;hp=4c826b04ed6c668a3d5b2a66ee4b842789a483a0;hb=77dccd7c98d8cdb60caaf178b1123df71ea662c9;hpb=bc30aca13e5ec397c92e67002fbf7a103130b828 diff --git a/combinerds.py b/combinerds.py index 4c826b0..2878423 100755 --- a/combinerds.py +++ b/combinerds.py @@ -10,7 +10,9 @@ except: pass import sys +import optparse import ReadDataset +from commoncode import getConfigParser, getConfigOption, getConfigBoolOption print "combinerds: version 1.2" @@ -19,97 +21,107 @@ def main(argv=None): if not argv: argv = sys.argv - if len(argv) < 2: - print 'usage: python %s destinationRDS inputrds1 [inputrds2 ....] [-table table_name] [--init] [--initrna] [--index] [--cache pages]' % argv[0] - #print '\nwhere the optional metadata name::value pairs are added to the existing dataset\n' + usage = "usage: python %s destinationRDS inputrds1 [inputrds2 ....] [-table table_name] [--init] [--initrna] [--index] [--cache pages]" % argv[0] + parser = makeParser(usage) + (options, args) = parser.parse_args(argv[1:]) + + if len(args) < 2: + print usage sys.exit(1) - doCache = False - cachePages = -1 - if '--cache' in argv: - doCache = True - try: - cachePages = int(argv[sys.argv.index('-cache') + 1]) - except: - pass - - datafile = argv[1] - infileList = [] - for index in range(2, len(argv)): - if argv[index][0] == '-': - break - infileList.append(sys.argv[index]) + datafile = args[0] + infileList = args[1:] - print "destination RDS: %s" % datafile + combinerds(datafile, infileList, options.tableList, options.withFlag, options.doIndex, options.cachePages, options.doInit, options.initRNA) - if '--initrna' in argv: - rds = ReadDataset.ReadDataset(datafile, initialize=True, datasetType='RNA') - elif '--init' in argv: - rds = ReadDataset.ReadDataset(datafile, initialize=True) - withFlag = '' - if '--flag' in argv: - withFlag = argv[sys.argv.index('-flag') + 1] - print "restrict to flag = %s" % withFlag +def makeParser(): + usage = __doc__ + + parser = optparse.OptionParser(usage=usage) + parser.add_option("--table", action="append", dest="tablelist") + parser.add_option("--init", action="store_true", dest="doInit") + parser.add_option("--initrna", action="store_true", dest="initRNA") + parser.add_option("--index", action="store_true", dest="doIndex") + parser.add_option("--cache", type="int", dest="cachePages") + parser.add_option("--flag", dest="withFlag") + + configParser = getConfigParser() + section = "combinerds" + doInit = getConfigBoolOption(configParser, section, "doInit", False) + initRNA = getConfigBoolOption(configParser, section, "initRNA", False) + doIndex = getConfigBoolOption(configParser, section, "doIndex", False) + cachePages = getConfigOption(configParser, section, "cachePages", None) + withFlag = getConfigOption(configParser, section, "withFlag", "") + + parser.set_defaults(tableList=[], doInit=doInit, initRNA=initRNA, doIndex=doIndex, cachePages=cachePages, + withFlag=withFlag) + + return parser + - rds = ReadDataset.ReadDataset(datafile, verbose=True, cache=doCache) +def combinerds(datafile, infileList, tableList=[], withFlag="", doIndex=False, cachePages=None, doInit=False, initRNA=False): + print "destination RDS: %s" % datafile + datasetType="DNA" + if initRNA: + doInit = True + datasetType="RNA" + + doCache = False + if cachePages is not None: + doCache = True + else: + cachePages = -1 + + rds = ReadDataset.ReadDataset(datafile, verbose=True, cache=doCache, initialize=doInit, datasetType=datasetType) if cachePages > rds.getDefaultCacheSize(): rds.setDBcache(cachePages) - cacheVal = cachePages else: - cacheVal = rds.getDefaultCacheSize() - - doIndex = False - if '--index' in argv: - doIndex = True + cachePages = rds.getDefaultCacheSize() - tableList = [] - if '--table' in argv: - tableList.append(argv[argv.index('-table') + 1]) - else: + if tableList == []: tableList = rds.getTables() - combinerds(datafile, rds, infileList, cacheVal, tableList, withFlag, doIndex, doCache) - + if withFlag != "": + print "restrict to flag = %s" % withFlag -def combinerds(datafile, rds, infileList, cacheVal, tableList=[], withFlag="", doIndex=False, doCache=False): metaDict = rds.getMetadata() if "numberImports" not in metaDict: origIndex = 0 - rds.insertMetadata([("numberImports", str(0))]) + rds.insertMetadata([("numberImports", "0")]) else: origIndex = int(metaDict["numberImports"]) index = origIndex for inputfile in infileList: - asname = "input" + str(index) - rds.attachDB(inputfile,asname) + dbName = "input%s" % str(index) + rds.attachDB(inputfile, dbName) for table in tableList: print "importing table %s from file %s" % (table, inputfile) - ascols = "*" + dbColumns = "*" if table == "uniqs": - ascols = "NULL, '%s' || readID, chrom, start, stop, sense, weight, flag, mismatch" % asname + dbColumns = "NULL, '%s' || readID, chrom, start, stop, sense, weight, flag, mismatch" % dbName elif table == "multi": - ascols = "NULL, '%s' || readID, chrom, start, stop, sense, weight, flag, mismatch" % asname + dbColumns = "NULL, '%s' || readID, chrom, start, stop, sense, weight, flag, mismatch" % dbName elif table == "splices": - ascols = "NULL, '%s' || readID, chrom, startL, stopL, startR, stopR, sense, weight, flag, mismatch" % asname - elif table == "metadata": - ascols = "name, value || ' (import_%d)'" % index - rds.importFromDB(asname, table, ascols) + dbColumns = "NULL, '%s' || readID, chrom, startL, stopL, startR, stopR, sense, weight, flag, mismatch" % dbName - if table != "metadata": - rds.importFromDB(asname, table, ascols, withFlag) + if table == "metadata": + dbColumns = "name, value || ' (import_%d)'" % index + rds.importFromDB(dbName, table, dbColumns) + else: + rds.importFromDB(dbName, table, dbColumns, withFlag) - rds.detachDB(asname) - rds.insertMetadata([("import_" + str(index), "%s %s" % (inputfile, str(tableList)))]) + rds.detachDB(dbName) + rds.insertMetadata([("import_%s" % str(index), "%s %s" % (inputfile, str(tableList)))]) index += 1 rds.updateMetadata("numberImports", index, origIndex) if doIndex: print "building index...." - if cacheVal > 0: - rds.buildIndex(cacheVal) + if cachePages > 0: + rds.buildIndex(cachePages) else: rds.buildIndex()