first pass cleanup of cistematic/genomes; change bamPreprocessing
[erange.git] / getGOgenes.py
1 import sys
2 import optparse
3 from cistematic.genomes import Genome
4 from commoncode import getGeneInfoDict, getConfigParser, getConfigOption, getConfigBoolOption
5
6 print "getGOgenes: version 3.2"
7
8 def main(argv=None):
9     if not argv:
10         argv = sys.argv
11
12     usage = "usage: python %s genome GOID1 [GOID2 ....] [--outfile outfilename] [--append] [--restrict genefile]"
13
14     parser = makeParser(usage)
15     (options, args) = parser.parse_args(argv[1:])
16
17     if len(args) < 2:
18         print usage
19         sys.exit(1)
20
21     genome = args[0]
22
23     GOIDlist = []
24     for arg in args:
25         if "GO:" in arg:
26             GOIDlist.append(arg)
27
28     getGOgenes(genome, GOIDlist, options.outfilename, options.restrictfilename, options.append)
29
30
31 def makeParser(usage=""):
32     parser = optparse.OptionParser(usage=usage)
33     parser.add_option("--outfile", dest="outfilename")
34     parser.add_option("--append", action="store_true", dest="append")
35     parser.add_option("--restrict", dest="restrictfilename")
36
37     configParser = getConfigParser()
38     section = "getGOgenes"
39     outfilename = getConfigOption(configParser, section, "outfilename", None)
40     restrictfilename = getConfigOption(configParser, section, "restrictfilename", None)
41     append = getConfigBoolOption(configParser, section, "append", False)
42
43     parser.set_defaults(outfilename=outfilename, restrictfilename=restrictfilename, append=append)
44
45     return parser
46
47
48 def getGOgenes(genome, GOIDlist, outfilename=None, restrictfilename=None, append=False):
49     writeOut = False
50     if outfilename is not None:
51         writeOut = True
52
53     restrict = False
54     if restrictfilename is not None:
55         restrict = True
56     
57     hg = Genome(genome)
58
59     print sys.argv
60     print GOIDlist
61
62     firstGeneList = []
63     for GOID in GOIDlist:
64         testList = hg.allGIDsbyGOID(GOID)
65         print "GOID: %s (%d)" % (GOID, len(testList))
66         firstGeneList += testList
67
68     geneDict = {}
69     for gid in firstGeneList:
70         geneDict[gid] = 1
71
72     geneList = geneDict.keys()
73     print len(geneList)
74     geneInfoList = getGeneInfoDict(genome)
75
76     if writeOut:
77         if append:
78             outfile = open(outfilename, "a")
79         else:
80             outfile = open(outfilename, "w")
81
82         for GOID in GOIDlist:
83             outfile.write("#%s\n" % GOID) 
84
85     restrictList = []
86     restrictDict = {}
87     if restrict:
88         restrictFile = open(restrictfilename)
89         for line in restrictFile:
90             fields = line.strip().split()
91             restrictList.append(fields[0])
92             restrictDict[fields[0]] = line
93
94     outList = []
95     symbolDict = {}
96     for gid in geneList:
97         symbol = "LOC" + gid
98         if restrict and gid not in restrictList:
99             continue
100
101         try:
102             symbol = geneInfoList[gid][0][0]
103         except:
104             pass
105
106         if restrict:
107             symbolDict[symbol] = restrictDict[gid]
108
109         outList.append(symbol)
110
111     outList.sort()
112     for symbol in outList:
113         if writeOut:
114             if restrict:
115                 outfile.write(symbolDict[symbol])
116             else:
117                 outfile.write(symbol + "\n")
118         else:
119             print symbol
120
121     if writeOut:
122         outfile.close()
123
124
125 if __name__ == "__main__":
126     main(sys.argv)