Rewrite of findall.py to clean code. Configuration tested using
[erange.git] / geneMrnaCountsWeighted.py
index 5299d27d26c23f8729063eb31f0da390730b5ce5..74e7a0cd817e699af6b398960578f576a030f7e4 100755 (executable)
@@ -68,6 +68,12 @@ def makeParser(usage=""):
     return parser
 
 
+#TODO: Reported user performance issue. Long run times in conditions:
+#    small number of reads ~40-50M
+#    all features on single chromosome
+#
+#    User states has been a long time problem.
+
 def geneMrnaCountsWeighted(genome, hitfile, countfile, outfilename, ignoreSense=True,
                            withUniqs=False, withMulti=False, acceptfile=None,
                            cachePages=None, doVerbose=False, extendGenome="", replaceModels=False):
@@ -200,6 +206,7 @@ def writeCountsToFile(outFilename, countFilename, allGIDs, genome, gidReadDict,
     for line in uniquecounts:
         fields = line.strip().split()
         # add a pseudo-count here to ease calculations below
+        #TODO: figure out why this was done in prior implementation...
         uniqueCountDict[fields[0]] = float(fields[-1]) + 1
 
     uniquecounts.close()
@@ -264,4 +271,4 @@ def getTagCount(uniqueCountDict, gid, gidReadDict, read2GidDict):
 
 
 if __name__ == "__main__":
-    main(sys.argv)
+    main(sys.argv)
\ No newline at end of file