From acf8fead4739cfadbd829788841cbf959d9f5055 Mon Sep 17 00:00:00 2001 From: Brandon King Date: Wed, 28 May 2008 19:19:47 +0000 Subject: [PATCH] Bed file generation is here! * Added a new makebed function which returns a generator rather than writing to outstream. * Updated the make_description function in makebed.py to be able to handle cases like ' (deleted)'. * Added a bedfile generation view which uses the makebed generator function to prevent memory issues on the webserver. * Visit /library/ for trying out the new features! --- gaworkflow/frontend/fctracker/views.py | 40 ++++++++++++++++++++++ gaworkflow/frontend/urls.py | 3 +- gaworkflow/util/makebed.py | 47 ++++++++++++++++++++++++-- 3 files changed, 87 insertions(+), 3 deletions(-) diff --git a/gaworkflow/frontend/fctracker/views.py b/gaworkflow/frontend/fctracker/views.py index 7f117c9..05358e9 100644 --- a/gaworkflow/frontend/fctracker/views.py +++ b/gaworkflow/frontend/fctracker/views.py @@ -1,9 +1,13 @@ # Create your views here. from gaworkflow.frontend.fctracker.models import Library from gaworkflow.frontend.fctracker.results import get_flowcell_result_dict, flowcellIdStrip +from gaworkflow.frontend import settings +from gaworkflow.util import makebed from gaworkflow.util import opener from django.http import HttpResponse +import StringIO + #from django.db.models import base def library(request): @@ -80,8 +84,43 @@ def result_fc_cnm_eland_lane(request, fc_id, cnm, lane): f = opener.autoopen(filepath, 'r') return HttpResponse(f) + + +def bedfile_fc_cnm_eland_lane(request, fc_id, cnm, lane): + """ + returns a bed file for a given flowcell, CN-M (i.e. C1-33), and lane + """ + fc_id = flowcellIdStrip(fc_id) + d = get_flowcell_result_dict(fc_id) + + if d is None: + return HttpResponse('Results for Flowcell %s not found.' % (fc_id)) + + if cnm not in d: + return HttpResponse('Results for Flowcell %s; %s not found.' % (fc_id, cnm)) + erd = d[cnm]['eland_results'] + lane = int(lane) + + if lane not in erd: + return HttpResponse('Results for Flowcell %s; %s; lane %s not found.' % (fc_id, cnm, lane)) + + filepath = erd[lane] + + # Eland result file + fi = open(filepath, 'r') + # output memory file + + + name, description = makebed.make_description(settings.DATABASE_NAME, + fc_id, + lane) + + bedgen = makebed.make_bed_from_eland_stream_generator(fi, name, description) + + return HttpResponse(bedgen) + def _files(flowcell_id, lane): """ Sets up available files for download @@ -100,6 +139,7 @@ def _files(flowcell_id, lane): if int(lane) in erd: output.append('eland_result(%s)' % (flowcell_id, c_name, lane, c_name)) + output.append('bedfile(%s)' % (flowcell_id, c_name, lane, c_name)) if len(output) == 0: return '' diff --git a/gaworkflow/frontend/urls.py b/gaworkflow/frontend/urls.py index b359c33..add9279 100644 --- a/gaworkflow/frontend/urls.py +++ b/gaworkflow/frontend/urls.py @@ -15,5 +15,6 @@ urlpatterns = patterns('', #(r'^databrowse/(.*)', databrowse.site.root), (r'^library/$', 'gaworkflow.frontend.fctracker.views.library'), (r'^library/(?P\w+)/$', 'gaworkflow.frontend.fctracker.views.library_to_flowcells'), - (r'^results/(?P\w+)/(?PC[1-9]-[0-9]+)/eland_result/(?P[1-8])','gaworkflow.frontend.fctracker.views.result_fc_cnm_eland_lane') + (r'^results/(?P\w+)/(?PC[1-9]-[0-9]+)/eland_result/(?P[1-8])','gaworkflow.frontend.fctracker.views.result_fc_cnm_eland_lane'), + (r'^results/(?P\w+)/(?PC[1-9]-[0-9]+)/bedfile/(?P[1-8])','gaworkflow.frontend.fctracker.views.bedfile_fc_cnm_eland_lane'), ) diff --git a/gaworkflow/util/makebed.py b/gaworkflow/util/makebed.py index 738b815..4d73cb6 100755 --- a/gaworkflow/util/makebed.py +++ b/gaworkflow/util/makebed.py @@ -41,6 +41,45 @@ def make_bed_from_eland_stream(instream, outstream, name, description, chromosom sense_color[fields[SENSE]], os.linesep )) + +def make_bed_from_eland_stream_generator(instream, name, description, chromosome_prefix='chr'): + """ + read an eland result file from instream and output it as a generator (iterator) + """ + # indexes into fields in eland_result.txt file + SEQ = 1 + CHR = 6 + START = 7 + SENSE = 8 + # map eland_result.txt sense + sense_map = { 'F': '+', 'R': '-'} + sense_color = { 'F': '0,0,255', 'R': '255,255,0' } + # provide default track names + if name is None: name = "track" + if description is None: description = "eland result file" + bed_header = 'track name="%s" description="%s" visibility=4 itemRgb="ON"' + bed_header += os.linesep + yield bed_header % (name, description) + + for line in instream: + fields = line.split() + # we need more than the CHR field, and it needs to match a chromosome + if len(fields) <= CHR or \ + (chromosome_prefix is not None and \ + fields[CHR][:3] != chromosome_prefix): + continue + start = fields[START] + stop = int(start) + len(fields[SEQ]) + chromosome, extension = fields[CHR].split('.') + assert extension == "fa" + yield '%s %s %d read 0 %s - - %s%s' % ( + chromosome, + start, + stop, + sense_map[fields[SENSE]], + sense_color[fields[SENSE]], + os.linesep + ) def make_description(database, flowcell_id, lane): """ @@ -49,7 +88,7 @@ def make_description(database, flowcell_id, lane): from gaworkflow.util.fctracker import fctracker fc = fctracker(database) - cells = fc._get_flowcells("where flowcell_id='%s'" % (flowcell_id)) + cells = fc._get_flowcells("where flowcell_id LIKE '%s%%'" % (flowcell_id)) if len(cells) != 1: raise RuntimeError("couldn't find flowcell id %s" % (flowcell_id)) lane = int(lane) @@ -59,7 +98,11 @@ def make_description(database, flowcell_id, lane): name = "%s-%s" % (flowcell_id, lane) cell_id, cell = cells.items()[0] - assert cell_id == flowcell_id + + #The assertion is no longer true after I changed + # the where statement to include the LIKE command. + # because flowcells are being renamed to 'FC12269 (deleted)' + #assert cell_id == flowcell_id cell_library_id = cell['lane_%d_library_id' %(lane,)] cell_library = cell['lane_%d_library' %(lane,)] -- 2.30.2