From 29accc48b3a457aa4c5fd5a4b52aa9db677c15e8 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Thu, 29 Jan 2009 01:42:41 +0000 Subject: [PATCH] Update the library listing and summary reports to be prettier and easier to use. This also involved updating the pipelines.eland class to have some helpful read-only properties to do some math for our reporting. Also I added a library.html template instead of just dumping raw html to the django response object. --- gaworkflow/frontend/fctracker/views.py | 106 +++++++++++++++++++------ gaworkflow/pipeline/eland.py | 44 ++++++++++ templates/library.html | 46 +++++++++++ templates/summary_stats.html | 103 ++++++++++++++++++++---- 4 files changed, 258 insertions(+), 41 deletions(-) create mode 100644 templates/library.html diff --git a/gaworkflow/frontend/fctracker/views.py b/gaworkflow/frontend/fctracker/views.py index 0d2eecb..bfa9d4b 100644 --- a/gaworkflow/frontend/fctracker/views.py +++ b/gaworkflow/frontend/fctracker/views.py @@ -12,18 +12,35 @@ from django.template.loader import get_template from django.template import Context import StringIO +import logging +import os #from django.db.models import base +LANE_LIST = [1,2,3,4,5,6,7,8] + +def create_library_list(): + """ + Create a list of libraries that includes how many lanes were run + """ + library_list = [] + for lib in Library.objects.all(): + summary = {} + summary['library_id'] = lib.library_id + summary['library_name'] = lib.library_name + summary['species_name' ] = lib.library_species.scientific_name + lanes_run = 0 + for lane_id in LANE_LIST: + lane = getattr(lib, 'lane_%d_library' % (lane_id,)) + lanes_run += len( lane.all() ) + summary['lanes_run'] = lanes_run + library_list.append(summary) + return library_list def library(request): - library_list = Library.objects.all() #.order_by('-pub_date') - rep_string = '%s - %s (%s)' - output = '
\n'.join([rep_string \ - % (l.library_id, - l.library_id, - l.library_name, - l.library_species.scientific_name) for l in library_list]) - return HttpResponse(output) + library_list = create_library_list() + t = get_template('library.html') + c = Context({'library_list': library_list }) + return HttpResponse( t.render(c) ) def library_to_flowcells(request, lib_id): """ @@ -74,24 +91,18 @@ def library_to_flowcells(request, lib_id): flowcell_list.extend([ (fc.flowcell_id, 8) for fc in lib.lane_8_library.all() ]) flowcell_list.sort() - output.append('
') - - data_dict_list = [] + lane_summary_list = [] for fc, lane in flowcell_list: - dicts, err_list, summary_list = _summary_stats(fc, lane) + lane_summary, err_list = _summary_stats(fc, lane) - data_dict_list.extend(dicts) + lane_summary_list.extend(lane_summary) for err in err_list: output.append(err) - - for summary in summary_list: - output.append(summary.replace('\n', '
\n')) - html = t.render(Context({'data_dict_list': data_dict_list})) - output.append('
') - output.append('
') - output.append(html) + + logging.error("len lane summary %d" % (len(lane_summary_list))) output.append('
') + output.append(t.render(Context({'lane_summary_list': lane_summary_list}))) output.append('
') if record_count == 0: @@ -194,7 +205,47 @@ def bedfile_fc_cnm_eland_lane(request, fc_id, cnm, lane, ucsc_compatible=False): return HttpResponse(bedgen, mimetype="application/x-bedfile") -def _summary_stats(flowcell_id, lane): +def _summary_stats(flowcell_id, lane_id): + """ + Return the summary statistics for a given flowcell, lane, and end. + """ + fc_id = flowcellIdStrip(flowcell_id) + fc_result_dict = get_flowcell_result_dict(fc_id) + + summary_list = [] + err_list = [] + + if fc_result_dict is None: + err_list.append('Results for Flowcell %s not found.' % (fc_id)) + return (summary_list, err_list) + + for cycle_width in fc_result_dict: + xmlpath = fc_result_dict[cycle_width]['run_xml'] + + if xmlpath is None: + err_list.append('Run xml for Flowcell %s(%s) not found.' % (fc_id, cnm)) + continue + + tree = ElementTree.parse(xmlpath).getroot() + try: + runs = runfolder.PipelineRun(pathname='', xml=tree) + gerald_summary = runs.gerald.summary.lane_results + for end in range(len(gerald_summary)): + eland_summary = runs.gerald.eland_results.results[end][lane_id] + # add information to lane_summary + eland_summary.flowcell_id = flowcell_id + eland_summary.clusters = gerald_summary[end][lane_id].cluster + eland_summary.cycle_width = cycle_width + eland_summary.summarized_reads = runfolder.summarize_mapped_reads(eland_summary.mapped_reads) + summary_list.append(eland_summary) + + except Exception, e: + summary_list.append("Summary report needs to be updated.") + logging.error("Exception: " + str(e)) + + return (summary_list, err_list) + +def _summary_stats_old(flowcell_id, lane): """ return a dictionary of summary stats for a given flowcell_id & lane. """ @@ -220,10 +271,15 @@ def _summary_stats(flowcell_id, lane): tree = ElementTree.parse(xmlpath).getroot() results = runfolder.PipelineRun(pathname='', xml=tree) try: - summary_list.append(runfolder.summary_report([results])) - except: + lane_report = runfolder.summarize_lane(results.gerald, lane) + summary_list.append(os.linesep.join(lane_report)) + except Exception, e: summary_list.append("Summary report needs to be updated.") - + logging.error("Exception: " + str(e)) + + print "----------------------------------" + print "-- DOES NOT SUPPORT PAIRED END ---" + print "----------------------------------" lane_results = results.gerald.summary[0][lane] lrs = lane_results @@ -280,4 +336,4 @@ def _files(flowcell_id, lane): return '' return '(' + '|'.join(output) + ')' - + diff --git a/gaworkflow/pipeline/eland.py b/gaworkflow/pipeline/eland.py index a946227..ef75160 100644 --- a/gaworkflow/pipeline/eland.py +++ b/gaworkflow/pipeline/eland.py @@ -194,6 +194,50 @@ class ElandLane(object): return self._match_codes match_codes = property(_get_match_codes) + def _get_no_match(self): + if self._mapped_reads is None: + self._update() + return self._match_codes['NM'] + no_match = property(_get_no_match, + doc="total reads that didn't match the target genome.") + + def _get_no_match_percent(self): + return float(self.no_match)/self.reads * 100 + no_match_percent = property(_get_no_match_percent, + doc="no match reads as percent of total") + + def _get_qc_failed(self): + if self._mapped_reads is None: + self._update() + return self._match_codes['QC'] + qc_failed = property(_get_qc_failed, + doc="total reads that didn't match the target genome.") + + def _get_qc_failed_percent(self): + return float(self.qc_failed)/self.reads * 100 + qc_failed_percent = property(_get_qc_failed_percent, + doc="QC failed reads as percent of total") + + def _get_unique_reads(self): + if self._mapped_reads is None: + self._update() + sum = 0 + for code in ['U0','U1','U2']: + sum += self._match_codes[code] + return sum + unique_reads = property(_get_unique_reads, + doc="total unique reads") + + def _get_repeat_reads(self): + if self._mapped_reads is None: + self._update() + sum = 0 + for code in ['R0','R1','R2']: + sum += self._match_codes[code] + return sum + repeat_reads = property(_get_repeat_reads, + doc="total repeat reads") + def get_elements(self): lane = ElementTree.Element(ElandLane.LANE, {'version': diff --git a/templates/library.html b/templates/library.html new file mode 100644 index 0000000..19e30c7 --- /dev/null +++ b/templates/library.html @@ -0,0 +1,46 @@ + + +{% block summary_stats %} + + + + + + + + + + + {% for lib in library_list%} + + + + + + + {% endfor %} + +
Library IDSpeciesLibrary NameTotal Lanes
{{ lib.library_id }}{{ lib.species_name }}{{ lib.library_name }}{{ lib.lanes_run }}
+{% endblock %} diff --git a/templates/summary_stats.html b/templates/summary_stats.html index 78a909e..89c6faa 100644 --- a/templates/summary_stats.html +++ b/templates/summary_stats.html @@ -1,23 +1,94 @@ +
+
+ {% block summary_stats %} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -
No MatchQC FailedUniqueRepeat
CyclesFlowcellLaneEndClustersRaw Readstotal%total%Total0 mismatch1 mismatch2 mismatchTotal0 mismatch1 mismatch2 mismatch
-{% for d in data_dict_list %} - {# Header row for table #} - {% if forloop.first %} - - {% for key, value in d.items %} - - {% endfor %} - - {% endif %} - {# Main contents of table #} + {% for lane in lane_summary_list %} - {% for key, value in d.items %} - - {% endfor %} + + + + + + + + + + + + + + + + + + -{% endfor %} + {% endfor %} +
{{ key }}
{{ value }}{{ lane.cycle_width }}{{ lane.flowcell_id }}{{ lane.lane_id }}{% if lane.end %}{{ lane.end }}{% endif %}{{ lane.clusters.0 }} ± {{ lane.clusters.1 }}{{ lane.reads }}{{ lane.no_match }}{{ lane.no_match_percent|stringformat:".2f" }}{{ lane.qc_failed }}{{ lane.qc_failed_percent|stringformat:".2f" }}{{ lane.unique_reads }}{{ lane.match_codes.U0 }}{{ lane.match_codes.U1 }}{{ lane.match_codes.U2 }}{{ lane.repeat_reads }}{{ lane.match_codes.R0 }}{{ lane.match_codes.R1 }}{{ lane.match_codes.R2 }}
- +
+
+{% for lane in lane_summary_list %} +

{{lane.cycle_width}} {{ lane.flowcell_id }} {{ lane.lane_id }} {{ lane.end }}

+ +{% endfor %} {% endblock %} -- 2.30.2