first pass cleanup of cistematic/genomes; change bamPreprocessing
[erange.git] / utrChanges.py
1 #
2 #  utrChanges.py
3 #  ENRAGE
4 #
5
6 try:
7     import psyco
8     psyco.full()
9 except:
10     pass
11
12 import sys
13 from commoncode import getMergedRegions, getLocusByChromDict
14 from cistematic.genomes import Genome
15
16 print "utrChanges: version 1.4"
17
18
19 def main(argv=None):
20     if not argv:
21         argv = sys.argv
22
23     if len(argv) < 4:
24         print "usage: python %s genome acceptedfile outfile" % argv[0]
25         sys.exit(1)
26
27     genome = argv[1]
28     acceptfile =  argv[2]
29     outfile = argv[3]
30
31     utrChanges(genome, acceptfile, outfile)
32
33
34 def utrChanges(genome, acceptfile, outFileName):
35     acceptDict = getMergedRegions(acceptfile, maxDist=0, keepLabel=True, verbose=True)
36     outfile = open(outFileName, "w")
37
38     hg = Genome(genome)
39
40     origLocusByChromDict = getLocusByChromDict(hg, keepSense=True)
41     newLocusByChromDict = getLocusByChromDict(hg, additionalRegionsDict=acceptDict, keepSense=True)
42
43     new3utr = 0
44     new5utr = 0
45     changedGene = 0
46
47     for chrom in origLocusByChromDict:
48         for (gstart, gstop, gid, glen, sense) in origLocusByChromDict[chrom]:   
49             for (newstart, newstop, newgid, newlen, newsense) in newLocusByChromDict[chrom]:
50                 if gid == newgid:
51                     changedBoundary = False
52                     new3p = "F"
53                     new5p = "F"
54                     if newstart < gstart:
55                         if sense == "R":
56                             new3utr += 1
57                             new3p = "T"
58                             changedBoundary = True
59                         elif sense == "F":
60                             new5utr += 1
61                             new5p = "T"
62                             changedBoundary = True
63                         else:
64                             print sense
65
66                     if newstop > gstop:
67                         if sense == "R":
68                             new5utr += 1
69                             new5p = "T"
70                             changedBoundary = True
71                         elif sense == "F":
72                             new3utr += 1
73                             new3p = "T"
74                             changedBoundary = True
75                         else:
76                             print sense
77
78                     if changedBoundary:
79                         changedGene += 1
80                         outfile.write("%s\tchr%s\t%d\t%d\t%s\tchr%s\t%d\t%d\t%s\t%s\n" % (gid, chrom, gstart, gstop, sense, chrom, newstart, newstop, new5p, new3p))
81
82                     continue
83
84     outfile.close()
85     print "%d new 5'utr" % new5utr
86     print "%d new 3'utr" % new3utr
87     print "%s affected genes" % changedGene
88
89
90 if __name__ == "__main__":
91     main(sys.argv)