X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=makerdsfromblat.py;fp=makerdsfromblat.py;h=17520401c52b5d153404aa4312a800017e9af3bb;hp=37576ca867fb2283f9a5d497c0bbca7d06191f7e;hb=77dccd7c98d8cdb60caaf178b1123df71ea662c9;hpb=bc30aca13e5ec397c92e67002fbf7a103130b828 diff --git a/makerdsfromblat.py b/makerdsfromblat.py index 37576ca..1752040 100755 --- a/makerdsfromblat.py +++ b/makerdsfromblat.py @@ -20,6 +20,9 @@ import ReadDataset verstring = "makerdsfromblat: version 3.10" print verstring +NUM_HEADER_LINES = 5 + + def main(argv=None): if not argv: argv = sys.argv @@ -98,24 +101,15 @@ def makerdsfromblat(label, filename, outdbname, dataType="DNA", init=True, verbose=False, cachePages=100000, geneDataFileName="", propertyList=[]): - delimiter = "|" - minIntron = 10 - maxBorder = 0 - index = 0 - insertSize = 100000 - + writeLog(outdbname + ".log", verstring, string.join(sys.argv[1:])) if forceRNA: print "forcing datatype to RNA" dataType = "RNA" - if dataType == "RNA": - genedatafile = open(geneDataFileName) - - writeLog(outdbname + ".log", verstring, string.join(sys.argv[1:])) - geneDict = {} mapDict = {} if dataType == "RNA" and not forceRNA: + genedatafile = open(geneDataFileName) for line in genedatafile: fields = line.strip().split("\t") blockCount = int(fields[7]) @@ -164,9 +158,10 @@ def makerdsfromblat(label, filename, outdbname, dataType="DNA", init=True, # make some assumptions based on first read infile = open(filename, "r") - for arg in range(6): + for arg in range(NUM_HEADER_LINES): line = infile.readline() + line = infile.readline() fields = line.split() readsize = int(fields[10]) pairedTest = fields[9][-2:] @@ -186,8 +181,9 @@ def makerdsfromblat(label, filename, outdbname, dataType="DNA", init=True, rds.insertMetadata([("blat_mapped", "True")]) minReadScore = readsize - readsize/25 - 1 - trim = -4 + maxBorder = 0 if dataType == "RNA": + trim = -4 maxBorder = readsize + trim infile = open(filename, "r") @@ -199,9 +195,12 @@ def makerdsfromblat(label, filename, outdbname, dataType="DNA", init=True, index = uIndex = mIndex = sIndex = lIndex = 0 bestScore = 0 # skip headers - for arg in range(5): + for arg in range(NUM_HEADER_LINES): line = infile.readline() + insertSize = 100000 + delimiter = "|" + minIntron = 10 for line in infile: lIndex += 1 fields = line.strip().split()