import Region
commoncodeVersion = 5.6
-currentRDSversion = 2.0
+#TODO: This is a function dumping ground - needs to be reworked
class ErangeError(Exception):
pass
logfile.close()
+def getChromInfoList(chrominfo):
+ chromInfoList = []
+ linelist = open(chrominfo)
+ for line in linelist:
+ fields = line.strip().split('\t')
+ chr = fields[0]
+ start = 0
+ end = int(fields[1])
+ chromInfoList.append((chr,start,end))
+
+ linelist.close()
+
+ return chromInfoList
+
+
+# BAM file support functions
+def isSpliceEntry(cigarTupleList):
+ isSplice = False
+ if cigarTupleList is not None:
+ for operation,length in cigarTupleList:
+ if operation == 3:
+ isSplice = True
+ break
+
+ return isSplice
+
+
+def addPairIDtoReadID(alignedread):
+ ID = alignedread.qname
+ if alignedread.is_read1:
+ ID = ID + '/1'
+
+ if alignedread.is_read2:
+ ID = ID + '/2'
+
+ return ID
+
+
+def getHeaderComment(bamHeader, commentKey):
+ for comment in bamHeader["CO"]:
+ fields = comment.split("\t")
+ if fields[0] == commentKey:
+ return fields[1]
+
+ raise KeyError
+
+
+def getReadSense(read):
+ if read.is_reverse:
+ sense = "-"
+ else:
+ sense = "+"
+
+ return sense
+
+#TODO: is this where we need to take into account the NH > 10 restriction?
+def countThisRead(read, countUniqs, countMulti, countSplices, sense):
+ countRead = True
+
+ if sense in ['-', '+'] and sense != getReadSense(read):
+ countRead = False
+ elif not countSplices and isSpliceEntry(read.cigar):
+ countRead = False
+ elif not countUniqs and read.opt('NH') == 1:
+ countRead = False
+ elif not countMulti and read.opt('NH') > 1:
+ countRead = False
+
+ return countRead
+
+
+# Cistematic functions
def getGeneInfoDict(genome, cache=False):
idb = geneinfoDB(cache=cache)
if genome == "dmelanogaster":
return geneannotDict
+# Configuration File Support
def getConfigParser(fileList=[]):
configFiles = ["erange.config", os.path.expanduser("~/.erange.config")]
for filename in fileList:
return setting
+# All the Other Stuff from before
def getMergedRegions(regionfilename, maxDist=1000, minHits=0, verbose=False, keepLabel=False,
fullChrom=False, chromField=1, scoreField=4, pad=0, compact=False,
doMerge=True, keepPeak=False, returnTop=0):
# the case of 3 regions ABC that are in the input file as ACB as it goes now when processing C there
# will be no merge with A as B is needed to bridge the two. When it comes time to process B it will
# be merged with A but that will exit the loop and the merge with C will be missed.
- #TODO: QForAli - Are we going to require sorted region files to have this even work?
+ #TODO: Are we going to require sorted region files to have this even work?
for regionEntry in regionList:
if regionEntry[0] == "#":
if "pvalue" in regionEntry:
# implementing a triangular smooth
smoothArray = array("f", [0.] * length)
- #TODO: QForAli - really? We're going to insert MANUFACTURED data (0.0's) and use them in the calculation
- # (probably) just to avoid IndexError exceptions. Then we are going to completely ignore adjusting the
- # last 2 elements of the array for (probably) the same issue. Is this how erange is dealing with the
- # edge cases?
for pos in range(2,length -2):
smoothArray[pos] = (seqArray[pos -2] + 2 * seqArray[pos - 1] + 3 * seqArray[pos] + 2 * seqArray[pos + 1] + seqArray[pos + 2]) / 9.0