erange version 4.0a dev release
[erange.git] / analyzego.py
index d4f9f6f95500c1a1b351d6f717629eb7018cc9ea..6542b1bf24c256e5907c799c785b0ab060fb9eaf 100755 (executable)
@@ -4,11 +4,12 @@ try:
 except:
     print "psyco not running"
 
-import sys, optparse
+import sys
+import optparse
+from commoncode import getGeneInfoDict, getConfigParser, getConfigOption
 from cistematic.cisstat.analyzego import calculateGOStats
-from cistematic.core.geneinfo import geneinfoDB
 
-print "version 2.1"
+print "analyzego: version 2.2"
 
 def main(argv=None):
     if not argv:
@@ -16,12 +17,7 @@ def main(argv=None):
 
     usage = "usage: python %prog genome infilename prefix [--geneName] [--field fieldID]"
 
-    parser = optparse.OptionParser(usage=usage)
-    parser.add_option("--geneName", action="store_true", dest="translateGene",
-                      help="translate gene")
-    parser.add_option("--field", type="int", dest="fieldID",
-                      help="column containing gene ID/Name")
-    parser.set_defaults(translateGene=False, fieldID=None)
+    parser = makeParser(usage)
     (options, args) = parser.parse_args(argv[1:])
 
     if len(args) < 3:
@@ -42,6 +38,23 @@ def main(argv=None):
     analyzeGOFromFile(genome, infilename, prefix, options.translateGene, fieldID)
 
 
+def makeParser(usage=""):
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option("--geneName", action="store_true", dest="translateGene",
+                      help="translate gene")
+    parser.add_option("--field", type="int", dest="fieldID",
+                      help="column containing gene ID/Name")
+
+    configParser = getConfigParser()
+    section = "analyzego"
+    translateGene = getConfigOption(configParser, section, "translateGene", False)
+    fieldID = getConfigOption(configParser, section, "fieldID", None)
+
+    parser.set_defaults(translateGene=translateGene, fieldID=fieldID)
+
+    return parser
+
+
 def analyzeGOFromFile(genome, infilename, prefix, translateGene=False, fieldID=1):
     infile = open(infilename)
     analyzeGO(genome, infile, prefix, translateGene=False, fieldID=1)
@@ -50,12 +63,7 @@ def analyzeGOFromFile(genome, infilename, prefix, translateGene=False, fieldID=1
 
 def analyzeGO(genome, geneInfoList, prefix, translateGene=False, fieldID=1):
     if translateGene:
-        idb = geneinfoDB(cache=True)
-        geneinfoDict = idb.getallGeneInfo(genome)
-        symbolToGidDict = {}
-        for gid in geneinfoDict:
-            symbol = geneinfoDict[gid][0][0].strip()
-            symbolToGidDict[symbol] = gid
+        symbolToGidDict = getSymbolDict(genome)
 
     locusList = []
     for line in geneInfoList:
@@ -82,5 +90,16 @@ def analyzeGO(genome, geneInfoList, prefix, translateGene=False, fieldID=1):
     if len(locusList) > 0:
         calculateGOStats(locusList, prefix)
 
+
+def getSymbolDict(genome):
+    geneinfoDict = getGeneInfoDict(genome, cache=True)
+    symbolToGidDict = {}
+    for gid in geneinfoDict:
+        symbol = geneinfoDict[gid][0][0].strip()
+        symbolToGidDict[symbol] = gid
+
+    return symbolToGidDict
+
+
 if __name__ == "__main__":
     main(sys.argv)
\ No newline at end of file