X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=geneMrnaCountsWeighted.py;fp=geneMrnaCountsWeighted.py;h=74e7a0cd817e699af6b398960578f576a030f7e4;hp=5299d27d26c23f8729063eb31f0da390730b5ce5;hb=03f1e0b3bab22d517ad75b9af4d54e8fcb8540fb;hpb=b54eee5365a0fad35d2f6168eaac82ff5a359222 diff --git a/geneMrnaCountsWeighted.py b/geneMrnaCountsWeighted.py index 5299d27..74e7a0c 100755 --- a/geneMrnaCountsWeighted.py +++ b/geneMrnaCountsWeighted.py @@ -68,6 +68,12 @@ def makeParser(usage=""): return parser +#TODO: Reported user performance issue. Long run times in conditions: +# small number of reads ~40-50M +# all features on single chromosome +# +# User states has been a long time problem. + def geneMrnaCountsWeighted(genome, hitfile, countfile, outfilename, ignoreSense=True, withUniqs=False, withMulti=False, acceptfile=None, cachePages=None, doVerbose=False, extendGenome="", replaceModels=False): @@ -200,6 +206,7 @@ def writeCountsToFile(outFilename, countFilename, allGIDs, genome, gidReadDict, for line in uniquecounts: fields = line.strip().split() # add a pseudo-count here to ease calculations below + #TODO: figure out why this was done in prior implementation... uniqueCountDict[fields[0]] = float(fields[-1]) + 1 uniquecounts.close() @@ -264,4 +271,4 @@ def getTagCount(uniqueCountDict, gid, gidReadDict, read2GidDict): if __name__ == "__main__": - main(sys.argv) + main(sys.argv) \ No newline at end of file