X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=analyzego.py;h=f8ff0017f9b51401411423b9ff8d5420b0714409;hp=d4f9f6f95500c1a1b351d6f717629eb7018cc9ea;hb=HEAD;hpb=5e4ae21098dba3d1edcf11e7279da0d84c3422e4 diff --git a/analyzego.py b/analyzego.py index d4f9f6f..f8ff001 100755 --- a/analyzego.py +++ b/analyzego.py @@ -4,11 +4,12 @@ try: except: print "psyco not running" -import sys, optparse +import sys +import optparse +from commoncode import getGeneInfoDict, getConfigParser, getConfigOption, getConfigIntOption from cistematic.cisstat.analyzego import calculateGOStats -from cistematic.core.geneinfo import geneinfoDB -print "version 2.1" +print "analyzego: version 2.2" def main(argv=None): if not argv: @@ -16,12 +17,7 @@ def main(argv=None): usage = "usage: python %prog genome infilename prefix [--geneName] [--field fieldID]" - parser = optparse.OptionParser(usage=usage) - parser.add_option("--geneName", action="store_true", dest="translateGene", - help="translate gene") - parser.add_option("--field", type="int", dest="fieldID", - help="column containing gene ID/Name") - parser.set_defaults(translateGene=False, fieldID=None) + parser = makeParser(usage) (options, args) = parser.parse_args(argv[1:]) if len(args) < 3: @@ -39,23 +35,38 @@ def main(argv=None): infilename = args[1] prefix = args[2] - analyzeGOFromFile(genome, infilename, prefix, options.translateGene, fieldID) + analyzeGOFromFile(genome, infilename, prefix, options.translateGene, fieldID, numTests=options.numTests) + + +def makeParser(usage=""): + parser = optparse.OptionParser(usage=usage) + parser.add_option("--geneName", action="store_true", dest="translateGene", + help="translate gene") + parser.add_option("--field", type="int", dest="fieldID", + help="column containing gene ID/Name") + parser.add_option("--trials", type="int", dest="numTests", + help="column containing gene ID/Name") + + configParser = getConfigParser() + section = "analyzego" + translateGene = getConfigOption(configParser, section, "translateGene", False) + fieldID = getConfigOption(configParser, section, "fieldID", None) + numTests = getConfigIntOption(configParser, section, "numTests", 1) + + parser.set_defaults(translateGene=translateGene, fieldID=fieldID, numTests=numTests) + return parser -def analyzeGOFromFile(genome, infilename, prefix, translateGene=False, fieldID=1): + +def analyzeGOFromFile(genome, infilename, prefix, translateGene=False, fieldID=1, numTests=1): infile = open(infilename) - analyzeGO(genome, infile, prefix, translateGene=False, fieldID=1) + analyzeGO(genome, infile, prefix, translateGene, fieldID, numTests) infile.close() -def analyzeGO(genome, geneInfoList, prefix, translateGene=False, fieldID=1): +def analyzeGO(genome, geneInfoList, prefix, translateGene=False, fieldID=1, numTests=1): if translateGene: - idb = geneinfoDB(cache=True) - geneinfoDict = idb.getallGeneInfo(genome) - symbolToGidDict = {} - for gid in geneinfoDict: - symbol = geneinfoDict[gid][0][0].strip() - symbolToGidDict[symbol] = gid + symbolToGidDict = getSymbolDict(genome) locusList = [] for line in geneInfoList: @@ -80,7 +91,18 @@ def analyzeGO(genome, geneInfoList, prefix, translateGene=False, fieldID=1): locusList.append((genome, gID)) if len(locusList) > 0: - calculateGOStats(locusList, prefix) + calculateGOStats(locusList, prefix, trials=numTests) + + +def getSymbolDict(genome): + geneinfoDict = getGeneInfoDict(genome, cache=True) + symbolToGidDict = {} + for gid in geneinfoDict: + symbol = geneinfoDict[gid][0][0].strip() + symbolToGidDict[symbol] = gid + + return symbolToGidDict + if __name__ == "__main__": main(sys.argv) \ No newline at end of file