snapshot of 4.0a development. initial git repo commit
[erange.git] / analyzego.py
1 try:
2     import psyco
3     psyco.full()
4 except:
5     print "psyco not running"
6
7 import sys, optparse
8 from cistematic.cisstat.analyzego import calculateGOStats
9 from cistematic.core.geneinfo import geneinfoDB
10
11 print "version 2.1"
12
13 def main(argv=None):
14     if not argv:
15         argv = sys.argv
16
17     usage = "usage: python %prog genome infilename prefix [--geneName] [--field fieldID]"
18
19     parser = optparse.OptionParser(usage=usage)
20     parser.add_option("--geneName", action="store_true", dest="translateGene",
21                       help="translate gene")
22     parser.add_option("--field", type="int", dest="fieldID",
23                       help="column containing gene ID/Name")
24     parser.set_defaults(translateGene=False, fieldID=None)
25     (options, args) = parser.parse_args(argv[1:])
26
27     if len(args) < 3:
28         print usage
29         sys.exit(1)
30
31     fieldID = 1
32     if options.translateGene:
33         fieldID = 0
34
35     if options.fieldID is not None:
36         fieldID = options.fieldID
37
38     genome = args[0]
39     infilename = args[1]
40     prefix = args[2]
41
42     analyzeGOFromFile(genome, infilename, prefix, options.translateGene, fieldID)
43
44
45 def analyzeGOFromFile(genome, infilename, prefix, translateGene=False, fieldID=1):
46     infile = open(infilename)
47     analyzeGO(genome, infile, prefix, translateGene=False, fieldID=1)
48     infile.close()
49
50
51 def analyzeGO(genome, geneInfoList, prefix, translateGene=False, fieldID=1):
52     if translateGene:
53         idb = geneinfoDB(cache=True)
54         geneinfoDict = idb.getallGeneInfo(genome)
55         symbolToGidDict = {}
56         for gid in geneinfoDict:
57             symbol = geneinfoDict[gid][0][0].strip()
58             symbolToGidDict[symbol] = gid
59
60     locusList = []
61     for line in geneInfoList:
62         fields = line.split()
63         if translateGene:
64             gene = fields[fieldID]
65             if "LOC" in gene:
66                 gID = gene[3:]
67             elif "FAR" in gene:
68                 print "ignoring %s" % gene
69                 continue
70             else:
71                 try:
72                     gID = symbolToGidDict[gene]
73                 except KeyError:
74                     print "ignoring %s" % gene
75                     continue
76         else:
77             gID = fields[fieldID]
78
79         if (genome, gID) not in locusList:
80             locusList.append((genome, gID))
81
82     if len(locusList) > 0:
83         calculateGOStats(locusList, prefix)
84
85 if __name__ == "__main__":
86     main(sys.argv)