except:
pass
-import sys, string, optparse
-from commoncode import readDataset, writeLog
+import sys
+import string
+import optparse
+from commoncode import writeLog, getConfigParser, getConfigOption, getConfigIntOption, getConfigBoolOption
+import ReadDataset
-verstring = "%prog: version 3.9"
+verstring = "makerdsfromblat: version 3.10"
print verstring
+NUM_HEADER_LINES = 5
+
+
def main(argv=None):
if not argv:
argv = sys.argv
usage = "usage: python %prog label infilename outrdsfile [propertyName::propertyValue] [options]"
- parser = optparse.OptionParser(usage=usage)
- parser.add_option("--append", action="store_false", dest="init")
- parser.add_option("--index", action="store_true", dest="doIndex")
- parser.add_option("--rawreadID", action="store_false", dest="trimReadID")
- parser.add_option("--forceRNA", action="store_true", dest="forceRNA")
- parser.add_option("--flag", action="store_true", dest="flagReads")
- parser.add_option("--strict", type="int", dest="minSpliceLength",
- help="min required bp on each side of a splice")
- parser.add_option("--spliceonly", action="store_true", dest="spliceOnly")
- parser.add_option("--verbose", action="store_true", dest="verbose")
- parser.add_option("--cache", type="int", dest="cachePages")
- parser.add_option("--RNA", dest="geneDataFileName")
- parser.set_defaults(init=True, doIndex=False, trimReadID=True, minSpliceLength=0, forceRNA=False, flagReads=False, spliceOnly=False, verbose=False, cachePages=100000, geneDataFileName="")
+ parser = getParser(usage)
(options, args) = parser.parse_args(argv[1:])
if len(args) < 3:
options.cachePages, options.geneDataFileName, propertyList)
+def getParser(usage):
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("--append", action="store_false", dest="init")
+ parser.add_option("--index", action="store_true", dest="doIndex")
+ parser.add_option("--rawreadID", action="store_false", dest="trimReadID")
+ parser.add_option("--forceRNA", action="store_true", dest="forceRNA")
+ parser.add_option("--flag", action="store_true", dest="flagReads")
+ parser.add_option("--strict", type="int", dest="minSpliceLength",
+ help="min required bp on each side of a splice")
+ parser.add_option("--spliceonly", action="store_true", dest="spliceOnly")
+ parser.add_option("--verbose", action="store_true", dest="verbose")
+ parser.add_option("--cache", type="int", dest="cachePages")
+ parser.add_option("--RNA", dest="geneDataFileName")
+
+ configParser = getConfigParser()
+ section = "makerdsfromblat"
+ init = getConfigBoolOption(configParser, section, "init", True)
+ doIndex = getConfigBoolOption(configParser, section, "doIndex", False)
+ trimReadID = getConfigBoolOption(configParser, section, "trimReadID", True)
+ minSpliceLength = getConfigIntOption(configParser, section, "minSpliceLength", 0)
+ forceRNA = getConfigBoolOption(configParser, section, "forceRNA", False)
+ flagReads = getConfigBoolOption(configParser, section, "flagReads", False)
+ spliceOnly = getConfigBoolOption(configParser, section, "spliceOnly", False)
+ verbose = getConfigBoolOption(configParser, section, "verbose", False)
+ cachePages = getConfigIntOption(configParser, section, "cachePages", 100000)
+ geneDataFileName = getConfigOption(configParser, section, "geneDataFileName", "")
+
+ parser.set_defaults(init=init, doIndex=doIndex, trimReadID=trimReadID, minSpliceLength=minSpliceLength, forceRNA=forceRNA,
+ flagReads=flagReads, spliceOnly=spliceOnly, verbose=verbose, cachePages=cachePages,
+ geneDataFileName=geneDataFileName)
+
+ return parser
+
+
def makerdsfromblat(label, filename, outdbname, dataType="DNA", init=True,
doIndex=False,trimReadID=True, minSpliceLength=0,
forceRNA=False, theFlag="", spliceOnly=False,
verbose=False, cachePages=100000, geneDataFileName="",
propertyList=[]):
- delimiter = "|"
- minIntron = 10
- maxBorder = 0
- index = 0
- insertSize = 100000
-
+ writeLog(outdbname + ".log", verstring, string.join(sys.argv[1:]))
if forceRNA:
print "forcing datatype to RNA"
dataType = "RNA"
- if dataType == "RNA":
- genedatafile = open(geneDataFileName)
-
- writeLog(outdbname + ".log", verstring, string.join(sys.argv[1:]))
-
geneDict = {}
mapDict = {}
if dataType == "RNA" and not forceRNA:
+ genedatafile = open(geneDataFileName)
for line in genedatafile:
fields = line.strip().split("\t")
blockCount = int(fields[7])
genedatafile.close()
- rds = readDataset(outdbname, init, dataType, verbose=True)
+ rds = ReadDataset.ReadDataset(outdbname, init, dataType, verbose=True)
#check that our cacheSize is better than the dataset's default cache size
defaultCacheSize = rds.getDefaultCacheSize()
# make some assumptions based on first read
infile = open(filename, "r")
- for arg in range(6):
+ for arg in range(NUM_HEADER_LINES):
line = infile.readline()
+ line = infile.readline()
fields = line.split()
readsize = int(fields[10])
pairedTest = fields[9][-2:]
rds.insertMetadata([("blat_mapped", "True")])
minReadScore = readsize - readsize/25 - 1
- trim = -4
+ maxBorder = 0
if dataType == "RNA":
+ trim = -4
maxBorder = readsize + trim
infile = open(filename, "r")
index = uIndex = mIndex = sIndex = lIndex = 0
bestScore = 0
# skip headers
- for arg in range(5):
+ for arg in range(NUM_HEADER_LINES):
line = infile.readline()
+ insertSize = 100000
+ delimiter = "|"
+ minIntron = 10
for line in infile:
lIndex += 1
fields = line.strip().split()