pass
import sys
+import optparse
import ReadDataset
+from commoncode import getConfigParser, getConfigOption, getConfigBoolOption
print "combinerds: version 1.2"
if not argv:
argv = sys.argv
- if len(argv) < 2:
- print 'usage: python %s destinationRDS inputrds1 [inputrds2 ....] [-table table_name] [--init] [--initrna] [--index] [--cache pages]' % argv[0]
- #print '\nwhere the optional metadata name::value pairs are added to the existing dataset\n'
+ usage = "usage: python %s destinationRDS inputrds1 [inputrds2 ....] [-table table_name] [--init] [--initrna] [--index] [--cache pages]" % argv[0]
+ parser = makeParser(usage)
+ (options, args) = parser.parse_args(argv[1:])
+
+ if len(args) < 2:
+ print usage
sys.exit(1)
- doCache = False
- cachePages = -1
- if '--cache' in argv:
- doCache = True
- try:
- cachePages = int(argv[sys.argv.index('-cache') + 1])
- except:
- pass
-
- datafile = argv[1]
- infileList = []
- for index in range(2, len(argv)):
- if argv[index][0] == '-':
- break
- infileList.append(sys.argv[index])
+ datafile = args[0]
+ infileList = args[1:]
- print "destination RDS: %s" % datafile
+ combinerds(datafile, infileList, options.tableList, options.withFlag, options.doIndex, options.cachePages, options.doInit, options.initRNA)
- if '--initrna' in argv:
- rds = ReadDataset.ReadDataset(datafile, initialize=True, datasetType='RNA')
- elif '--init' in argv:
- rds = ReadDataset.ReadDataset(datafile, initialize=True)
- withFlag = ''
- if '--flag' in argv:
- withFlag = argv[sys.argv.index('-flag') + 1]
- print "restrict to flag = %s" % withFlag
+def makeParser():
+ usage = __doc__
+
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("--table", action="append", dest="tablelist")
+ parser.add_option("--init", action="store_true", dest="doInit")
+ parser.add_option("--initrna", action="store_true", dest="initRNA")
+ parser.add_option("--index", action="store_true", dest="doIndex")
+ parser.add_option("--cache", type="int", dest="cachePages")
+ parser.add_option("--flag", dest="withFlag")
+
+ configParser = getConfigParser()
+ section = "combinerds"
+ doInit = getConfigBoolOption(configParser, section, "doInit", False)
+ initRNA = getConfigBoolOption(configParser, section, "initRNA", False)
+ doIndex = getConfigBoolOption(configParser, section, "doIndex", False)
+ cachePages = getConfigOption(configParser, section, "cachePages", None)
+ withFlag = getConfigOption(configParser, section, "withFlag", "")
+
+ parser.set_defaults(tableList=[], doInit=doInit, initRNA=initRNA, doIndex=doIndex, cachePages=cachePages,
+ withFlag=withFlag)
+
+ return parser
+
- rds = ReadDataset.ReadDataset(datafile, verbose=True, cache=doCache)
+def combinerds(datafile, infileList, tableList=[], withFlag="", doIndex=False, cachePages=None, doInit=False, initRNA=False):
+ print "destination RDS: %s" % datafile
+ datasetType="DNA"
+ if initRNA:
+ doInit = True
+ datasetType="RNA"
+
+ doCache = False
+ if cachePages is not None:
+ doCache = True
+ else:
+ cachePages = -1
+
+ rds = ReadDataset.ReadDataset(datafile, verbose=True, cache=doCache, initialize=doInit, datasetType=datasetType)
if cachePages > rds.getDefaultCacheSize():
rds.setDBcache(cachePages)
- cacheVal = cachePages
else:
- cacheVal = rds.getDefaultCacheSize()
-
- doIndex = False
- if '--index' in argv:
- doIndex = True
+ cachePages = rds.getDefaultCacheSize()
- tableList = []
- if '--table' in argv:
- tableList.append(argv[argv.index('-table') + 1])
- else:
+ if tableList == []:
tableList = rds.getTables()
- combinerds(datafile, rds, infileList, cacheVal, tableList, withFlag, doIndex, doCache)
-
+ if withFlag != "":
+ print "restrict to flag = %s" % withFlag
-def combinerds(datafile, rds, infileList, cacheVal, tableList=[], withFlag="", doIndex=False, doCache=False):
metaDict = rds.getMetadata()
if "numberImports" not in metaDict:
origIndex = 0
- rds.insertMetadata([("numberImports", str(0))])
+ rds.insertMetadata([("numberImports", "0")])
else:
origIndex = int(metaDict["numberImports"])
index = origIndex
for inputfile in infileList:
- asname = "input" + str(index)
- rds.attachDB(inputfile,asname)
+ dbName = "input%s" % str(index)
+ rds.attachDB(inputfile, dbName)
for table in tableList:
print "importing table %s from file %s" % (table, inputfile)
- ascols = "*"
+ dbColumns = "*"
if table == "uniqs":
- ascols = "NULL, '%s' || readID, chrom, start, stop, sense, weight, flag, mismatch" % asname
+ dbColumns = "NULL, '%s' || readID, chrom, start, stop, sense, weight, flag, mismatch" % dbName
elif table == "multi":
- ascols = "NULL, '%s' || readID, chrom, start, stop, sense, weight, flag, mismatch" % asname
+ dbColumns = "NULL, '%s' || readID, chrom, start, stop, sense, weight, flag, mismatch" % dbName
elif table == "splices":
- ascols = "NULL, '%s' || readID, chrom, startL, stopL, startR, stopR, sense, weight, flag, mismatch" % asname
- elif table == "metadata":
- ascols = "name, value || ' (import_%d)'" % index
- rds.importFromDB(asname, table, ascols)
+ dbColumns = "NULL, '%s' || readID, chrom, startL, stopL, startR, stopR, sense, weight, flag, mismatch" % dbName
- if table != "metadata":
- rds.importFromDB(asname, table, ascols, withFlag)
+ if table == "metadata":
+ dbColumns = "name, value || ' (import_%d)'" % index
+ rds.importFromDB(dbName, table, dbColumns)
+ else:
+ rds.importFromDB(dbName, table, dbColumns, withFlag)
- rds.detachDB(asname)
- rds.insertMetadata([("import_" + str(index), "%s %s" % (inputfile, str(tableList)))])
+ rds.detachDB(dbName)
+ rds.insertMetadata([("import_%s" % str(index), "%s %s" % (inputfile, str(tableList)))])
index += 1
rds.updateMetadata("numberImports", index, origIndex)
if doIndex:
print "building index...."
- if cacheVal > 0:
- rds.buildIndex(cacheVal)
+ if cachePages > 0:
+ rds.buildIndex(cachePages)
else:
rds.buildIndex()