snapshot of 4.0a development. initial git repo commit
[erange.git] / getGOgenes.py
1 import sys, optparse
2 from cistematic.genomes import Genome
3 from cistematic.core.geneinfo import geneinfoDB
4
5 print "%prog: version 3.1"
6
7 def main(argv=None):
8     if not argv:
9         argv = sys.argv
10
11     usage = "usage: python %s genome GOID1 [GOID2 ....] [--outfile outfilename] [--append] [--restrict genefile]"
12
13     parser = optparse.OptionParser(usage=usage)
14     parser.add_option("--outfile", dest="outfilename")
15     parser.add_option("--append", action="store_true", dest="append")
16     parser.add_option("--restrict", dest="restrictfilename")
17     parser.set_defaults(outfilename=None, restrictfilename=None, append=False)
18     (options, args) = parser.parse_args(argv[1:])
19
20     if len(args) < 2:
21         print usage
22         sys.exit(1)
23
24     genome = args[0]
25
26     GOIDlist = []
27     for arg in args:
28         if "GO:" in arg:
29             GOIDlist.append(arg)
30
31     getGOgenes(genome, GOIDlist, options.outfilename, options.restrictfilename, options.append)
32
33
34 def getGOgenes(genome, GOIDlist, outfilename=None, restrictfilename=None, append=False):
35     writeOut = False
36     if outfilename is not None:
37         writeOut = True
38
39     restrict = False
40     if restrictfilename is not None:
41         restrict = True
42     
43     hg = Genome(genome)
44     idb = geneinfoDB()
45
46     print sys.argv
47     print GOIDlist
48
49     firstGeneList = []
50     for GOID in GOIDlist:
51         testList = hg.allGIDsbyGOID(GOID)
52         print "GOID: %s (%d)" % (GOID, len(testList))
53         firstGeneList += testList
54
55     geneDict = {}
56     for gid in firstGeneList:
57         geneDict[gid] = 1
58
59     geneList = geneDict.keys()
60     print len(geneList)
61     geneInfoList = idb.getallGeneInfo(genome)
62
63     if writeOut:
64         if append:
65             outfile = open(outfilename, "a")
66         else:
67             outfile = open(outfilename, "w")
68
69         for GOID in GOIDlist:
70             outfile.write("#%s\n" % GOID) 
71
72     restrictList = []
73     restrictDict = {}
74     if restrict:
75         restrictFile = open(restrictfilename)
76         for line in restrictFile:
77             fields = line.strip().split()
78             restrictList.append(fields[0])
79             restrictDict[fields[0]] = line
80
81     outList = []
82     symbolDict = {}
83     for gid in geneList:
84         symbol = "LOC" + gid
85         if restrict and gid not in restrictList:
86             continue
87
88         try:
89             symbol = geneInfoList[gid][0][0]
90         except:
91             pass
92
93         if restrict:
94             symbolDict[symbol] = restrictDict[gid]
95
96         outList.append(symbol)
97
98     outList.sort()
99     for symbol in outList:
100         if writeOut:
101             if restrict:
102                 outfile.write(symbolDict[symbol])
103             else:
104                 outfile.write(symbol + "\n")
105         else:
106             print symbol
107
108     if writeOut:
109         outfile.close()
110
111
112 if __name__ == "__main__":
113     main(sys.argv)