projects
/
erange.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Release version for Erange 4.0a
[erange.git]
/
makerdsfromblat.py
diff --git
a/makerdsfromblat.py
b/makerdsfromblat.py
index 37576ca867fb2283f9a5d497c0bbca7d06191f7e..17520401c52b5d153404aa4312a800017e9af3bb 100755
(executable)
--- a/
makerdsfromblat.py
+++ b/
makerdsfromblat.py
@@
-20,6
+20,9
@@
import ReadDataset
verstring = "makerdsfromblat: version 3.10"
print verstring
verstring = "makerdsfromblat: version 3.10"
print verstring
+NUM_HEADER_LINES = 5
+
+
def main(argv=None):
if not argv:
argv = sys.argv
def main(argv=None):
if not argv:
argv = sys.argv
@@
-98,24
+101,15
@@
def makerdsfromblat(label, filename, outdbname, dataType="DNA", init=True,
verbose=False, cachePages=100000, geneDataFileName="",
propertyList=[]):
verbose=False, cachePages=100000, geneDataFileName="",
propertyList=[]):
- delimiter = "|"
- minIntron = 10
- maxBorder = 0
- index = 0
- insertSize = 100000
-
+ writeLog(outdbname + ".log", verstring, string.join(sys.argv[1:]))
if forceRNA:
print "forcing datatype to RNA"
dataType = "RNA"
if forceRNA:
print "forcing datatype to RNA"
dataType = "RNA"
- if dataType == "RNA":
- genedatafile = open(geneDataFileName)
-
- writeLog(outdbname + ".log", verstring, string.join(sys.argv[1:]))
-
geneDict = {}
mapDict = {}
if dataType == "RNA" and not forceRNA:
geneDict = {}
mapDict = {}
if dataType == "RNA" and not forceRNA:
+ genedatafile = open(geneDataFileName)
for line in genedatafile:
fields = line.strip().split("\t")
blockCount = int(fields[7])
for line in genedatafile:
fields = line.strip().split("\t")
blockCount = int(fields[7])
@@
-164,9
+158,10
@@
def makerdsfromblat(label, filename, outdbname, dataType="DNA", init=True,
# make some assumptions based on first read
infile = open(filename, "r")
# make some assumptions based on first read
infile = open(filename, "r")
- for arg in range(
6
):
+ for arg in range(
NUM_HEADER_LINES
):
line = infile.readline()
line = infile.readline()
+ line = infile.readline()
fields = line.split()
readsize = int(fields[10])
pairedTest = fields[9][-2:]
fields = line.split()
readsize = int(fields[10])
pairedTest = fields[9][-2:]
@@
-186,8
+181,9
@@
def makerdsfromblat(label, filename, outdbname, dataType="DNA", init=True,
rds.insertMetadata([("blat_mapped", "True")])
minReadScore = readsize - readsize/25 - 1
rds.insertMetadata([("blat_mapped", "True")])
minReadScore = readsize - readsize/25 - 1
- trim = -4
+ maxBorder = 0
if dataType == "RNA":
if dataType == "RNA":
+ trim = -4
maxBorder = readsize + trim
infile = open(filename, "r")
maxBorder = readsize + trim
infile = open(filename, "r")
@@
-199,9
+195,12
@@
def makerdsfromblat(label, filename, outdbname, dataType="DNA", init=True,
index = uIndex = mIndex = sIndex = lIndex = 0
bestScore = 0
# skip headers
index = uIndex = mIndex = sIndex = lIndex = 0
bestScore = 0
# skip headers
- for arg in range(
5
):
+ for arg in range(
NUM_HEADER_LINES
):
line = infile.readline()
line = infile.readline()
+ insertSize = 100000
+ delimiter = "|"
+ minIntron = 10
for line in infile:
lIndex += 1
fields = line.strip().split()
for line in infile:
lIndex += 1
fields = line.strip().split()