X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=analyzego.py;h=6542b1bf24c256e5907c799c785b0ab060fb9eaf;hp=d4f9f6f95500c1a1b351d6f717629eb7018cc9ea;hb=0d3e3112fd04c2e6b44a25cacef1d591658ad181;hpb=5e4ae21098dba3d1edcf11e7279da0d84c3422e4 diff --git a/analyzego.py b/analyzego.py index d4f9f6f..6542b1b 100755 --- a/analyzego.py +++ b/analyzego.py @@ -4,11 +4,12 @@ try: except: print "psyco not running" -import sys, optparse +import sys +import optparse +from commoncode import getGeneInfoDict, getConfigParser, getConfigOption from cistematic.cisstat.analyzego import calculateGOStats -from cistematic.core.geneinfo import geneinfoDB -print "version 2.1" +print "analyzego: version 2.2" def main(argv=None): if not argv: @@ -16,12 +17,7 @@ def main(argv=None): usage = "usage: python %prog genome infilename prefix [--geneName] [--field fieldID]" - parser = optparse.OptionParser(usage=usage) - parser.add_option("--geneName", action="store_true", dest="translateGene", - help="translate gene") - parser.add_option("--field", type="int", dest="fieldID", - help="column containing gene ID/Name") - parser.set_defaults(translateGene=False, fieldID=None) + parser = makeParser(usage) (options, args) = parser.parse_args(argv[1:]) if len(args) < 3: @@ -42,6 +38,23 @@ def main(argv=None): analyzeGOFromFile(genome, infilename, prefix, options.translateGene, fieldID) +def makeParser(usage=""): + parser = optparse.OptionParser(usage=usage) + parser.add_option("--geneName", action="store_true", dest="translateGene", + help="translate gene") + parser.add_option("--field", type="int", dest="fieldID", + help="column containing gene ID/Name") + + configParser = getConfigParser() + section = "analyzego" + translateGene = getConfigOption(configParser, section, "translateGene", False) + fieldID = getConfigOption(configParser, section, "fieldID", None) + + parser.set_defaults(translateGene=translateGene, fieldID=fieldID) + + return parser + + def analyzeGOFromFile(genome, infilename, prefix, translateGene=False, fieldID=1): infile = open(infilename) analyzeGO(genome, infile, prefix, translateGene=False, fieldID=1) @@ -50,12 +63,7 @@ def analyzeGOFromFile(genome, infilename, prefix, translateGene=False, fieldID=1 def analyzeGO(genome, geneInfoList, prefix, translateGene=False, fieldID=1): if translateGene: - idb = geneinfoDB(cache=True) - geneinfoDict = idb.getallGeneInfo(genome) - symbolToGidDict = {} - for gid in geneinfoDict: - symbol = geneinfoDict[gid][0][0].strip() - symbolToGidDict[symbol] = gid + symbolToGidDict = getSymbolDict(genome) locusList = [] for line in geneInfoList: @@ -82,5 +90,16 @@ def analyzeGO(genome, geneInfoList, prefix, translateGene=False, fieldID=1): if len(locusList) > 0: calculateGOStats(locusList, prefix) + +def getSymbolDict(genome): + geneinfoDict = getGeneInfoDict(genome, cache=True) + symbolToGidDict = {} + for gid in geneinfoDict: + symbol = geneinfoDict[gid][0][0].strip() + symbolToGidDict[symbol] = gid + + return symbolToGidDict + + if __name__ == "__main__": main(sys.argv) \ No newline at end of file