development release: conversion of ReadDataset to use BAM files
[erange.git] / geneMrnaCounts.py
index cf5065ab88c40279c7ca5dfb30b70bc99dfd2065..7b4a2cc819c30976692d9721ddb363756ebdf70a 100755 (executable)
@@ -124,7 +124,7 @@ def geneMrnaCounts(genomeName, hitfile, outfilename, trackStrand=False, doSplice
             continue
 
         if countFeats:
-            seenFeaturesByChromDict[chrom] = []
+            seenFeaturesByChromDict[chrom] = set([])
 
         print "\nchr%s" % chrom
         fullchrom = "chr%s" % chrom
@@ -137,18 +137,18 @@ def geneMrnaCounts(genomeName, hitfile, outfilename, trackStrand=False, doSplice
                     if featureSense == "R":
                         checkSense = "-"
 
-                    regionList.append((gid, fullchrom, start, stop, checkSense))
+                    regionData = (gid, fullchrom, start, stop, checkSense)
                     count = hitRDS.getCounts(fullchrom, start, stop, uniqs=doUniqs, multi=doMulti, splices=doSplices, sense=checkSense)
                 else:
-                    regionList.append((gid, fullchrom, start, stop))
+                    regionData = (gid, fullchrom, start, stop)
                     count = hitRDS.getCounts(fullchrom, start, stop, uniqs=doUniqs, multi=doMulti, splices=doSplices)
-                    if count != 0:
-                        print count
 
                 gidCount[gid] += count
+                if markGID:
+                    regionList.append(regionData)
+
                 if countFeats:
-                    if (start, stop, gid, featureSense) not in seenFeaturesByChromDict[chrom]:
-                        seenFeaturesByChromDict[chrom].append((start, stop, gid, featureSense))
+                    seenFeaturesByChromDict[chrom].add((start, stop, gid, featureSense))
             except:
                 print "problem with %s - skipping" % gid