mark the example submission rule files as being raw, so the escapes dont get confused
[htsworkflow.git] / scripts / htsw-eland2bed
1 #!/usr/bin/python
2 import optparse
3 import sys
4 import os
5
6 from htsworkflow.util.opener import autoopen
7 from htsworkflow.util.makebed import make_bed_from_eland_stream, make_bed_from_multi_eland_stream, make_description
8
9 def make_parser():
10   parser = optparse.OptionParser()
11   parser.add_option('-e', '--eland', dest='inname',
12                     help='specify input eland filename')
13   parser.add_option('-b', '--bed', dest='outname',
14                     help='specify output befilename')
15   parser.add_option('-n', '--name', dest='name',
16                     help='specify the track (short) name.',
17                     default=None)
18   parser.add_option('-d', '--description', dest='description',
19                     help='specify the track description',
20                     default=None)
21   parser.add_option('--chromosome', dest='prefix',
22                     help='Set the chromosome prefix name. defaults to "chr"',
23                     default='chr')
24   parser.add_option("--database", dest='database',
25                     help="specify location of fctracker database",
26                     default=None)
27   parser.add_option("--flowcell", dest='flowcell',
28                     help="compute name and description from database using flowcell id",
29                     default=None)
30   parser.add_option("--lane", dest='lane',
31                     help='specify which lane to use when retrieving description from database',
32                     default=None)
33
34   multi = optparse.OptionGroup(parser, 'Multi-read ELAND support')
35
36   multi.add_option('-m', '--multi', action='store_true',
37                     help='Enable parsing multi-read eland files',
38                     default=False)
39   multi.add_option('--reads', type='int',
40                    help='limit reporting multi reads to this many reads'
41                         '(most usefully --reads=1 will turn a multi-read '
42                         'file into a single read file)',
43                    default=255)
44   parser.add_option_group(multi)
45
46   return parser
47
48 def main(command_line=None):
49   instream = None
50   outstream = None
51
52   if command_line is None:
53     command_line = sys.argv[1:]
54
55   parser = make_parser()
56   (options, args) = parser.parse_args(command_line)
57
58   if options.inname is None:
59     parser.error("Need eland input file name")
60     return 1
61
62   if options.inname == '-':
63     instream = sys.stdin
64   elif os.path.exists(options.inname):
65     instream = autoopen(options.inname, 'r')
66   else:
67     parser.error('%s was not found' % (options.inname))
68     return 1
69
70   # figure out name for output file
71   if options.outname is None:
72       # if outname wasn't defined, and we're reading from stdout
73       if instream is sys.stdin:
74           # write to stdout
75           outstream = sys.stdout
76       else:
77           # if there's a name write to name.bed
78           options.outname = os.path.splitext(options.inname)[0]+'.bed'
79           print >>sys.stderr, "defaulting to outputname", options.outname
80   elif options.outname == '-':
81       outstream = sys.stdout
82
83   if outstream is None:
84       if os.path.exists(options.outname):
85           parser.error("not overwriting %s" % (options.outname))
86           return 1
87       else:
88           outstream = open(options.outname, 'w')
89
90   if options.flowcell is not None and options.lane is not None:
91     # get our name/description out of the database
92     name, description = make_description(
93                            options.database, options.flowcell, options.lane
94                         )
95   else:
96     name = options.name
97     description = options.description
98
99   if options.multi:
100     make_bed_from_multi_eland_stream(instream, outstream, 
101                                      name, description, 
102                                      options.prefix,
103                                      options.reads)
104
105   else:
106     make_bed_from_eland_stream(instream, outstream, 
107                                name, description, 
108                                options.prefix)
109   return 0
110
111 if __name__ == "__main__":
112   sys.exit(main(sys.argv[1:]))
113