first pass cleanup of cistematic/genomes; change bamPreprocessing
[erange.git] / regiontobed.py
1 """
2     usage: python regiontobed label regionfile outbedfile [--color r,g,b] [--score field] [--narrowPeak] [--broadPeak] [--itemRgb] [--nolabel]
3            where color is in comma-delimited RGB without space
4            and field is a column with a score (first column is 0, second is 1,...)
5            t-narrowPeak assumes that findall.py was run with -listPeak
6            t-broadPeak assumes that findall.py was *NOT* run with -listPeak
7 """
8
9 try:
10     import psyco
11     psyco.full()
12 except:
13     pass
14
15 import sys
16 import math
17 import optparse
18 from commoncode import getConfigParser, getConfigOption, getConfigBoolOption
19
20 print "regiontobed: version 3.2"
21
22
23 def usage():
24     print __doc__
25
26
27 def main(argv=None):
28     if not argv:
29         argv = sys.argv
30
31     usage = __doc__
32
33     parser = getParser(usage)
34     (options, args) = parser.parse_args(argv[1:])
35
36     if len(args) < 3:
37         usage()
38         sys.exit(2)
39
40     factorlabel = args[0]
41     regionfile = args[1]
42     outfile = args[2]
43
44     regiontobed(factorlabel, regionfile, outfile, options.color,
45                 options.scoreField, options.doNarrow, options.doBroad,
46                 options.itemRGB, options.noLabel)
47
48
49 def getParser(usage):
50     parser = optparse.OptionParser(usage=usage)
51     parser.add_option("--color", dest="color")
52     parser.add_option("--score", type="int", dest="scoreField")
53     parser.add_option("--narrowPeak", action="store_true", dest="doNarrow")
54     parser.add_option("--broadPeak", action="store_true", dest="doBroad")
55     parser.add_option("--itemRgb", action="store_true", dest="itemRGB")
56     parser.add_option("--nolabel", action="store_true", dest="noLabel")
57
58     configParser = getConfigParser()
59     section = "regiontobed"
60     color = getConfigOption(configParser, section, "color", "0,0,0")
61     scoreField = getConfigOption(configParser, section, "scoreField", None)
62     doNarrow = getConfigBoolOption(configParser, section, "doNarrow", False)
63     doBroad = getConfigBoolOption(configParser, section, "doBroad", False)
64     itemRGB = getConfigBoolOption(configParser, section, "itemRGB", False)
65     noLabel = getConfigBoolOption(configParser, section, "noLabel", False)
66
67     parser.set_defaults(color=color, scoreField=scoreField, doNarrow=doNarrow,
68                         doBroad=doBroad, itemRGB=itemRGB, noLabel=noLabel)
69
70     return parser
71
72
73 def regiontobed(factorlabel, regionFileName, outFileName, color="0,0,0",
74                 scoreField=None, doNarrow=False, doBroad=False, itemRGB=False,
75                 noLabel=False):
76
77     regionfile = open(regionFileName)
78     outfile = open(outFileName, "w")
79
80     if itemRGB:
81         print "assigning each item its color"
82
83     if noLabel:
84         if itemRGB:
85             outfile.write('track name=%s visibility=4 itemRgb="on"\n' % factorlabel)
86         else:
87             outfile.write("track name=%s visibility=4 color=%s\n" % (factorlabel, color))
88
89     for line in regionfile:
90         if line[0] == "#":
91             continue
92
93         fields = line.strip().split()
94         if doNarrow:
95             signalVal = float(fields[4])
96             pval = float(fields[-1])
97             if pval == 0.:
98                 pValue = 350
99             else:
100                 pValue = -1. * math.log(pval, 10)
101
102             peakPos = int(fields[9]) - int(fields[2])
103             outfile.write("%s\t%s\t%s\t%s\t%d\t.\t%.4f\t%.4f\t-1\t%d" % (fields[1], fields[2], fields[3], fields[0], 0, signalVal, pValue, peakPos))
104         elif doBroad:
105             signalVal = float(fields[4])
106             pval = float(fields[-1])
107             if pval == 0.:
108                 pValue = 350
109             else:
110                 pValue = -1. * math.log(pval, 10)
111
112             outfile.write("%s\t%s\t%s\t%s\t%d\t.\t%.4f\t%.4f\t-1" % (fields[1], fields[2], fields[3], fields[0], 0, signalVal, pValue))
113         elif scoreField is not None:
114             score = int(float(fields[scoreField]))
115             if score > 1000:
116                 score = 1000
117
118             outfile.write("%s\t%s\t%s\t%s\t%s" % (fields[1], fields[2], fields[3], fields[0], score))
119             if itemRGB:
120                 outfile.write("\t+\t-\t-\t%s" % color)
121         else:
122             outfile.write("%s\t%s\t%s\t%s" % (fields[1], fields[2], fields[3], fields[0]))
123             if itemRGB:
124                 outfile.write("\t1000\t+\t-\t-\t%s" % color)
125
126         outfile.write("\n")
127
128     outfile.close()
129
130
131 if __name__ == "__main__":
132     main(sys.argv)