+
+def getDictionariesFromFiles(uniquecountfilename, splicecountfilename, fieldID, candidatefilename=""):
+
+ gidToGeneDict, splicecount = getGeneDict(uniquecountfilename, splicecountfilename, fieldID)
+ uniqueGidList = gidToGeneDict.keys()
+ if candidatefilename:
+ candidateDict, geneDictUpdates = processCandidatesFile(candidatefilename, fieldID, uniqueGidList)
+ else:
+ candidateDict = {}
+ geneDictUpdates = {}
+
+ farList = []
+ for gid in geneDictUpdates:
+ gidToGeneDict[gid] = geneDictUpdates[gid]
+ geneLength = 0.
+ for (cCount, cLength, chrom, cStart, cStop) in candidateDict[gid]:
+ geneLength += cLength / 1000.
+
+ farList.append((gid, geneLength))
+
+ return uniqueGidList, gidToGeneDict, candidateDict, farList, splicecount
+
+
+def getGeneDict(uniquecountfilename, splicecountfilename, fieldID):
+
+ gidToGeneDict = {}
+ uniquecountfile = open(uniquecountfilename)
+ for line in uniquecountfile:
+ fields = line.strip().split()
+ gid = fields[fieldID]
+ gene = fields[1]
+ gidToGeneDict[gid] = {"name": gene, "count": float(fields[-1])}
+
+ uniquecountfile.close()
+ splicecount = 0.
+ if splicecountfilename != "none":
+ splicecountfile = open(splicecountfilename)
+ for line in splicecountfile:
+ fields = line.strip().split()
+ gid = fields[fieldID]
+ try:
+ gidToGeneDict[gid]["count"] += float(fields[-1])
+ except:
+ print fields
+ continue
+
+ splicecount += float(fields[-1])
+
+ splicecountfile.close()
+
+ return gidToGeneDict, splicecount
+
+
+def processCandidatesFile(candidatefilename, fieldID, gidList):
+ """ Reads entries from the candiates file
+ gid entries not in gidList are returned for inclusion
+ creates and returns a dictionary keyed off gid with a single list of
+ tuples (count, length, chrom, start, stop) for each gid
+ """
+ geneDictUpdates = {}
+ candidateDict = {}
+ candidatefile = open(candidatefilename)
+ for line in candidatefile.readlines():
+ if "#" in line:
+ continue
+
+ fields = line.strip().split()
+ gid = fields[1]
+ gene = fields[0]
+ if gid not in gidList:
+ try:
+ geneDictUpdates[gid]["count"] += float(fields[6])
+ except KeyError:
+ geneDictUpdates[gid] = {"name": gene, "count": float(fields[6])}
+
+ try:
+ candidateDict[gid].append((float(fields[6]), abs(int(fields[5]) - int(fields[4])), fields[3], fields[4], fields[5]))
+ except KeyError:
+ candidateDict[gid] = [(float(fields[6]), abs(int(fields[5]) - int(fields[4])), fields[3], fields[4], fields[5])]
+
+ candidatefile.close()
+
+ return candidateDict, geneDictUpdates
+
+
+def getGenomeFeatures(genome, doCache=False, extendGenome="", replaceModels=False):
+ if extendGenome:
+ if replaceModels:
+ print "will replace gene models with %s" % extendGenome
+ else:
+ print "will extend gene models with %s" % extendGenome
+
+ if doCache:
+ cacheGeneDB(genome)
+ hg = Genome(genome, dbFile=chooseDB(genome), inRAM=True)
+ print "%s cached" % genome
+ else:
+ hg = Genome(genome, inRAM=True)
+
+ if extendGenome != "":
+ hg.extendFeatures(extendGenome, replace=replaceModels)
+
+ featuresDict = hg.getallGeneFeatures()
+