Rewrite of findall.py to clean code. Configuration tested using

[erange.git] / geneMrnaCountsWeighted.py
diff --git a/geneMrnaCountsWeighted.py b/geneMrnaCountsWeighted.py

index 5299d27d26c23f8729063eb31f0da390730b5ce5..74e7a0cd817e699af6b398960578f576a030f7e4 100755 (executable)
--- a/geneMrnaCountsWeighted.py
+++ b/geneMrnaCountsWeighted.py
@@ -68,6 +68,12 @@ def makeParser(usage=""):
      return parser
  
  
+#TODO: Reported user performance issue. Long run times in conditions:
+#    small number of reads ~40-50M
+#    all features on single chromosome
+#
+#    User states has been a long time problem.
+
  def geneMrnaCountsWeighted(genome, hitfile, countfile, outfilename, ignoreSense=True,
                             withUniqs=False, withMulti=False, acceptfile=None,
                             cachePages=None, doVerbose=False, extendGenome="", replaceModels=False):
@@ -200,6 +206,7 @@ def writeCountsToFile(outFilename, countFilename, allGIDs, genome, gidReadDict,
      for line in uniquecounts:
          fields = line.strip().split()
          # add a pseudo-count here to ease calculations below
+        #TODO: figure out why this was done in prior implementation...
          uniqueCountDict[fields[0]] = float(fields[-1]) + 1
  
      uniquecounts.close()
@@ -264,4 +271,4 @@ def getTagCount(uniqueCountDict, gid, gidReadDict, read2GidDict):
  
  
  if __name__ == "__main__":
-    main(sys.argv)
+    main(sys.argv)
+\ No newline at end of file