#!/usr/bin/python import optparse import sys import os from htsworkflow.util.opener import autoopen from htsworkflow.util.makebed import make_bed_from_eland_stream, make_bed_from_multi_eland_stream, make_description def make_parser(): parser = optparse.OptionParser() parser.add_option('-e', '--eland', dest='inname', help='specify input eland filename') parser.add_option('-b', '--bed', dest='outname', help='specify output befilename') parser.add_option('-n', '--name', dest='name', help='specify the track (short) name.', default=None) parser.add_option('-d', '--description', dest='description', help='specify the track description', default=None) parser.add_option('--chromosome', dest='prefix', help='Set the chromosome prefix name. defaults to "chr"', default='chr') parser.add_option("--database", dest='database', help="specify location of fctracker database", default=None) parser.add_option("--flowcell", dest='flowcell', help="compute name and description from database using flowcell id", default=None) parser.add_option("--lane", dest='lane', help='specify which lane to use when retrieving description from database', default=None) multi = optparse.OptionGroup(parser, 'Multi-read ELAND support') multi.add_option('-m', '--multi', action='store_true', help='Enable parsing multi-read eland files', default=False) multi.add_option('--reads', type='int', help='limit reporting multi reads to this many reads' '(most usefully --reads=1 will turn a multi-read ' 'file into a single read file)', default=255) parser.add_option_group(multi) return parser def main(command_line=None): instream = None outstream = None if command_line is None: command_line = sys.argv[1:] parser = make_parser() (options, args) = parser.parse_args(command_line) if options.inname is None: parser.error("Need eland input file name") return 1 if options.inname == '-': instream = sys.stdin elif os.path.exists(options.inname): instream = autoopen(options.inname, 'r') else: parser.error('%s was not found' % (options.inname)) return 1 # figure out name for output file if options.outname is None: # if outname wasn't defined, and we're reading from stdout if instream is sys.stdin: # write to stdout outstream = sys.stdout else: # if there's a name write to name.bed options.outname = os.path.splitext(options.inname)[0]+'.bed' print >>sys.stderr, "defaulting to outputname", options.outname elif options.outname == '-': outstream = sys.stdout if outstream is None: if os.path.exists(options.outname): parser.error("not overwriting %s" % (options.outname)) return 1 else: outstream = open(options.outname, 'w') if options.flowcell is not None and options.lane is not None: # get our name/description out of the database name, description = make_description( options.database, options.flowcell, options.lane ) else: name = options.name description = options.description if options.multi: make_bed_from_multi_eland_stream(instream, outstream, name, description, options.prefix, options.reads) else: make_bed_from_eland_stream(instream, outstream, name, description, options.prefix) return 0 if __name__ == "__main__": sys.exit(main(sys.argv[1:]))