snapshot of 4.0a development. initial git repo commit
[erange.git] / regiontobed.py
1 """
2     usage: python regiontobed label regionfile outbedfile [--color r,g,b] [--score field] [--narrowPeak] [--broadPeak] [--itemRgb] [--nolabel]
3            where color is in comma-delimited RGB without space
4            and field is a column with a score (first column is 0, second is 1,...)
5            t-narrowPeak assumes that findall.py was run with -listPeak
6            t-broadPeak assumes that findall.py was *NOT* run with -listPeak
7 """
8
9 try:
10     import psyco
11     psyco.full()
12 except:
13     pass
14
15 import sys, math, optparse
16
17 print "%prog: version 3.1"
18
19
20 def usage():
21     print __doc__
22
23
24 def main(argv=None):
25     if not argv:
26         argv = sys.argv
27
28     usage = __doc__
29
30     parser = optparse.OptionParser(usage=usage)
31     parser.add_option("--color", dest="color")
32     parser.add_option("--score", type="int", dest="scoreField")
33     parser.add_option("--narrowPeak", action="store_true", dest="doNarrow")
34     parser.add_option("--broadPeak", action="store_true", dest="doBroad")
35     parser.add_option("--itemRgb", action="store_true", dest="itemRGB")
36     parser.add_option("--nolabel", action="store_true", dest="noLabel")
37     parser.set_defaults(color="0,0,0", scoreField=None, doNarrow=False,
38                         doBroad=False, itemRGB=False, noLabel=False)
39     (options, args) = parser.parse_args(argv[1:])
40
41     if len(args) < 3:
42         usage()
43         sys.exit(2)
44
45     factorlabel = args[0]
46     regionfile = args[1]
47     outfile = args[2]
48
49     regiontobed(factorlabel, regionfile, outfile, options.color,
50                 options.scoreField, options.doNarrow, options.doBroad,
51                 options.itemRGB, options.noLabel)
52
53
54 def regiontobed(factorlabel, regionFileName, outFileName, color="0,0,0",
55                 scoreField=None, doNarrow=False, doBroad=False, itemRGB=False,
56                 noLabel=False):
57
58     regionfile = open(regionFileName)
59     outfile = open(outFileName, "w")
60
61     if itemRGB:
62         print "assigning each item its color"
63
64     if noLabel:
65         if itemRGB:
66             outfile.write('track name=%s visibility=4 itemRgb="on"\n' % factorlabel)
67         else:
68             outfile.write("track name=%s visibility=4 color=%s\n" % (factorlabel, color))
69
70     for line in regionfile:
71         if line[0] == "#":
72             continue
73
74         fields = line.strip().split()
75         if doNarrow:
76             signalVal = float(fields[4])
77             pval = float(fields[-1])
78             if pval == 0.:
79                 pValue = 350
80             else:
81                 pValue = -1. * math.log(pval, 10)
82
83             peakPos = int(fields[9]) - int(fields[2])
84             outfile.write("%s\t%s\t%s\t%s\t%d\t.\t%.4f\t%.4f\t-1\t%d" % (fields[1], fields[2], fields[3], fields[0], 0, signalVal, pValue, peakPos))
85         elif doBroad:
86             signalVal = float(fields[4])
87             pval = float(fields[-1])
88             if pval == 0.:
89                 pValue = 350
90             else:
91                 pValue = -1. * math.log(pval, 10)
92
93             outfile.write("%s\t%s\t%s\t%s\t%d\t.\t%.4f\t%.4f\t-1" % (fields[1], fields[2], fields[3], fields[0], 0, signalVal, pValue))
94         elif scoreField is not None:
95             score = int(float(fields[scoreField]))
96             if score > 1000:
97                 score = 1000
98
99             outfile.write("%s\t%s\t%s\t%s\t%s" % (fields[1], fields[2], fields[3], fields[0], score))
100             if itemRGB:
101                 outfile.write("\t+\t-\t-\t%s" % color)
102         else:
103             outfile.write("%s\t%s\t%s\t%s" % (fields[1], fields[2], fields[3], fields[0]))
104             if itemRGB:
105                 outfile.write("\t1000\t+\t-\t-\t%s" % color)
106
107         outfile.write("\n")
108
109     outfile.close()
110
111
112 if __name__ == "__main__":
113     main(sys.argv)