From 5c7a3523a03ffdf304fed1d1100158b249b39021 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Tue, 22 Apr 2008 00:02:16 +0000 Subject: [PATCH] Extend makebed to lookup metadata out of a copy of the fctracker database --- gaworkflow/util/makebed.py | 65 ++++++++++++++++++++++++++++++++++++++ scripts/makebed | 58 ++++++++++++---------------------- 2 files changed, 86 insertions(+), 37 deletions(-) create mode 100755 gaworkflow/util/makebed.py diff --git a/gaworkflow/util/makebed.py b/gaworkflow/util/makebed.py new file mode 100755 index 0000000..d2389ba --- /dev/null +++ b/gaworkflow/util/makebed.py @@ -0,0 +1,65 @@ +""" +Utility functions to make bedfiles. +""" +import os + +def make_bed_from_eland_stream(instream, outstream, name, description, chromosome_prefix='chr'): + """ + read an eland result file from instream and write a bedfile to outstream + """ + # indexes into fields in eland_result.txt file + SEQ = 1 + CHR = 6 + START = 7 + SENSE = 8 + # map eland_result.txt sense + sense_map = { 'F': '+', 'R': '-'} + sense_color = { 'F': '0,0,255', 'R': '255,255,0' } + # provide default track names + if name is None: name = "track" + if description is None: description = "eland result file" + bed_header = 'track name="%s" description="%s" visibility=4 itemRgb="ON"' + bed_header += os.linesep + outstream.write(bed_header % (name, description)) + + for line in instream: + fields = line.split() + # we need more than the CHR field, and it needs to match a chromosome + if len(fields) <= CHR or fields[CHR][:3] != chromosome_prefix: + continue + start = fields[START] + stop = int(start) + len(fields[SEQ]) + chromosome, extension = fields[CHR].split('.') + assert extension == "fa" + outstream.write('%s %s %d read 0 %s - - %s%s' % ( + chromosome, + start, + stop, + sense_map[fields[SENSE]], + sense_color[fields[SENSE]], + os.linesep + )) + +def make_description(database, flowcell_id, lane): + """ + compute a bedfile name and description from the fctracker database + """ + from gaworkflow.util.fctracker import fctracker + + fc = fctracker(database) + cells = fc._get_flowcells("where flowcell_id='%s'" % (flowcell_id)) + if len(cells) != 1: + raise RuntimeError("couldn't find flowcell id %s" % (flowcell_id)) + lane = int(lane) + if lane < 1 or lane > 8: + raise RuntimeError("flowcells only have lanes 1-8") + + name = "%s-%s" % (flowcell_id, lane) + + cell_id, cell = cells.items()[0] + assert cell_id == flowcell_id + + cell_library_id = cell['lane_%d_library_id' %(lane,)] + cell_library = cell['lane_%d_library' %(lane,)] + description = "%s-%s" % (cell_library['library_name'], cell_library_id) + return name, description diff --git a/scripts/makebed b/scripts/makebed index 2f1289f..b093835 100755 --- a/scripts/makebed +++ b/scripts/makebed @@ -3,42 +3,7 @@ import optparse import sys import os -def make_bed(instream, outstream, name, description, chromosome_prefix='chr'): - """ - read an eland result file from instream and write a bedfile to outstream - """ - # indexes into fields in eland_result.txt file - SEQ = 1 - CHR = 6 - START = 7 - SENSE = 8 - # map eland_result.txt sense - sense_map = { 'F': '+', 'R': '-'} - sense_color = { 'F': '0,0,255', 'R': '255,255,0' } - # provide default track names - if name is None: name = "track" - if description is None: description = "eland result file" - bed_header = 'track name="%s" description="%s" visibility=4 itemRgb="ON"' - bed_header += os.linesep - outstream.write(bed_header % (name, description)) - - for line in instream: - fields = line.split() - # we need more than the CHR field, and it needs to match a chromosome - if len(fields) <= CHR or fields[CHR][:3] != chromosome_prefix: - continue - start = fields[START] - stop = int(start) + len(fields[SEQ]) - chromosome, extension = fields[CHR].split('.') - assert extension == "fa" - outstream.write('%s %s %d read 0 %s - - %s%s' % ( - chromosome, - start, - stop, - sense_map[fields[SENSE]], - sense_color[fields[SENSE]], - os.linesep - )) +from gaworkflow.util.makebed import make_bed_from_eland_stream, make_description def make_parser(): parser = optparse.OptionParser() @@ -55,6 +20,15 @@ def make_parser(): parser.add_option('--chromosome', dest='prefix', help='Set the chromosome prefix name. defaults to "chr"', default='chr') + parser.add_option("--database", dest='database', + help="specify location of fctracker database", + default=None) + parser.add_option("--flowcell", dest='flowcell', + help="compute name and description from database using flowcell id", + default=None) + parser.add_option("--lane", dest='lane', + help='specify which lane to use when retrieving description from database', + default=None) return parser def main(command_line=None): @@ -84,7 +58,17 @@ def main(command_line=None): else: outstream = open(options.outname, 'w') - make_bed(instream, outstream, options.name, options.description, options.prefix) + if options.flowcell is not None and options.lane is not None: + # get our name/description out of the database + name, description = make_description( + options.database, options.flowcell, options.lane + ) + else: + name = options.name + description = options.description + + print name, description + #make_bed_from_eland_stream(instream, outstream, name, description, options.prefix) return 0 if __name__ == "__main__": -- 2.30.2