5 print "psyco not running"
9 from commoncode import getFeaturesByChromDict, getConfigParser, getConfigOption, getConfigBoolOption, countThisRead
10 from cistematic.genomes import Genome
11 from cistematic.core.geneinfo import geneinfoDB
13 print "geneMrnaCounts: version 6.0"
20 usage = "usage: python %prog genome rdsfile outfilename [options]"
22 parser = getParser(usage)
23 (options, args) = parser.parse_args(argv[1:])
33 geneMrnaCounts(genomeName, hitfile, outfilename, options.trackStrand, options.doSplices,
34 options.doUniqs, options.doMulti, options.extendGenome, options.replaceModels,
35 options.searchGID, options.countFeats, options.cachePages)
39 parser = optparse.OptionParser(usage=usage)
40 parser.add_option("--stranded", action="store_true", dest="trackStrand")
41 parser.add_option("--splices", action="store_true", dest="doSplices")
42 parser.add_option("--noUniqs", action="store_false", dest="doUniqs")
43 parser.add_option("--multi", action="store_true", dest="doMulti")
44 parser.add_option("--models", dest="extendGenome")
45 parser.add_option("--replacemodels", action="store_true", dest="replaceModels")
46 parser.add_option("--searchGID", action="store_true", dest="searchGID")
47 parser.add_option("--countfeatures", action="store_true", dest="countFeats")
48 parser.add_option("--cache", type="int", dest="cachePages")
50 configParser = getConfigParser()
51 section = "geneMrnaCounts"
52 trackStrand = getConfigBoolOption(configParser, section, "trackStrand", False)
53 doSplices = getConfigBoolOption(configParser, section, "doSplices", False)
54 doUniqs = getConfigBoolOption(configParser, section, "doUniqs", True)
55 doMulti = getConfigBoolOption(configParser, section, "doMulti", False)
56 extendGenome = getConfigOption(configParser, section, "extendGenome", "")
57 replaceModels = getConfigBoolOption(configParser, section, "replaceModels", False)
58 searchGID = getConfigBoolOption(configParser, section, "searchGID", False)
59 countFeats = getConfigBoolOption(configParser, section, "countFeats", False)
60 cachePages = getConfigOption(configParser, section, "cachePages", None)
62 parser.set_defaults(trackStrand=trackStrand, doSplices=doSplices, doUniqs=doUniqs, doMulti=doMulti,
63 extendGenome=extendGenome, replaceModels=replaceModels, searchGID=searchGID,
64 countFeats=countFeats, cachePages=cachePages)
68 def geneMrnaCounts(genomeName, bamfile, outfilename, trackStrand=False, doSplices=False,
69 doUniqs=True, doMulti=False, extendGenome="", replaceModels=False,
70 searchGID=False, countFeats=False):
73 print "will track strandedness"
80 print "will replace gene models with %s" % extendGenome
82 print "will extend gene models with %s" % extendGenome
86 genome = Genome(genomeName, inRAM=True)
87 if extendGenome != "":
88 genome.extendFeatures(extendGenome, replace=replaceModels)
90 print "getting gene features...."
91 featuresByChromDict = getFeaturesByChromDict(genome)
93 seenFeaturesByChromDict = {}
94 print "getting geneIDs...."
95 gidList = genome.allGIDs()
101 chromosomeList = [chrom for chrom in bamfile.references if chrom != "chrM"]
102 for chrom in chromosomeList:
103 if chrom not in featuresByChromDict:
107 seenFeaturesByChromDict[chrom] = set([])
109 print "\nchr%s" % chrom
110 print "counting GIDs"
111 for (start, stop, gid, featureSense, featureType) in featuresByChromDict[chrom]:
113 if doStranded == "track":
115 if featureSense == "R":
118 count = getCounts(chrom, start, stop, uniqs=doUniqs, multi=doMulti, splices=doSplices, sense=checkSense)
120 count = getCounts(chrom, start, stop, uniqs=doUniqs, multi=doMulti, splices=doSplices)
122 gidCount[gid] += count
124 seenFeaturesByChromDict[chrom].add((start, stop, gid, featureSense))
126 print "problem with %s - skipping" % gid
130 numFeatures = countFeatures(seenFeaturesByChromDict)
131 print "saw %d features" % numFeatures
133 writeOutputFile(outfilename, genome, gidCount, searchGID)
136 def getCounts(bamfile, chrom, start, stop, uniqs=True, multi=False, splices=False, sense=''):
138 for alignedread in bamfile.fetch(chrom, start, stop):
139 if countThisRead(alignedread, uniqs, multi, splices, sense):
140 count += 1.0/alignedread.opt('NH')
145 def countFeatures(seenFeaturesByChromDict):
147 for chrom in seenFeaturesByChromDict.keys():
149 count += len(seenFeaturesByChromDict[chrom])
156 def writeOutputFile(outfilename, genome, gidCount, searchGID):
157 geneAnnotDict = genome.allAnnotInfo()
158 genomeName = genome.genome
159 outfile = open(outfilename, "w")
160 idb = geneinfoDB(cache=True)
161 geneInfoDict = idb.getallGeneInfo(genomeName)
163 symbol = getGeneSymbol(gid, searchGID, geneInfoDict, idb, genomeName, geneAnnotDict)
164 outfile.write("%s\t%s\t%d\n" % (gid, symbol, gidCount[gid]))
169 def getGeneSymbol(gid, searchGID, geneInfoDict, idb, genomeName, geneAnnotDict):
171 if searchGID and gid not in geneInfoDict:
172 actualGeneID = idb.getGeneID(genomeName, gid)
173 if len(actualGeneID) > 0:
174 lookupGID = actualGeneID[1]
177 geneinfo = geneInfoDict[lookupGID]
178 symbol = geneinfo[0][0]
179 except (KeyError, IndexError):
181 symbol = geneAnnotDict[(genomeName, gid)][0]
182 except (KeyError, IndexError):
183 symbol = "LOC%s" % gid
188 if __name__ == "__main__":