Update the library listing and summary reports to be prettier and easier to use.
authorDiane Trout <diane@caltech.edu>
Thu, 29 Jan 2009 01:42:41 +0000 (01:42 +0000)
committerDiane Trout <diane@caltech.edu>
Thu, 29 Jan 2009 01:42:41 +0000 (01:42 +0000)
This also involved updating the pipelines.eland class to have some helpful
read-only properties to do some math for our reporting.

Also I added a library.html template instead of just dumping raw
html to the django response object.

gaworkflow/frontend/fctracker/views.py
gaworkflow/pipeline/eland.py
templates/library.html [new file with mode: 0644]
templates/summary_stats.html

index 0d2eecb4d998ce6944210b651be33ce9a338c9f5..bfa9d4b0f25022da8bba412531777feb73a1a893 100644 (file)
@@ -12,18 +12,35 @@ from django.template.loader import get_template
 from django.template import Context
 
 import StringIO
+import logging
+import os
 
 #from django.db.models import base 
+LANE_LIST = [1,2,3,4,5,6,7,8]
+
+def create_library_list():
+    """
+    Create a list of libraries that includes how many lanes were run
+    """
+    library_list = []
+    for lib in Library.objects.all():
+       summary = {}
+       summary['library_id'] = lib.library_id
+       summary['library_name'] = lib.library_name
+       summary['species_name' ] = lib.library_species.scientific_name
+       lanes_run = 0
+       for lane_id in LANE_LIST:
+           lane = getattr(lib, 'lane_%d_library' % (lane_id,))
+           lanes_run += len( lane.all() )
+       summary['lanes_run'] = lanes_run
+       library_list.append(summary)
+    return library_list
 
 def library(request):
-    library_list = Library.objects.all() #.order_by('-pub_date')
-    rep_string = '<a href="/library/%s/">%s - %s (%s)</a>'
-    output = '<br />\n'.join([rep_string \
-      % (l.library_id,
-         l.library_id,
-         l.library_name,
-         l.library_species.scientific_name) for l in library_list])
-    return HttpResponse(output)
+    library_list = create_library_list()
+    t = get_template('library.html')
+    c = Context({'library_list': library_list })
+    return HttpResponse( t.render(c) )
 
 def library_to_flowcells(request, lib_id):
     """
@@ -74,24 +91,18 @@ def library_to_flowcells(request, lib_id):
     flowcell_list.extend([ (fc.flowcell_id, 8) for fc in lib.lane_8_library.all() ])
     flowcell_list.sort()
     
-    output.append('<br />')
-    
-    data_dict_list = []
+    lane_summary_list = []
     for fc, lane in flowcell_list:
-        dicts, err_list, summary_list = _summary_stats(fc, lane)
+        lane_summary, err_list = _summary_stats(fc, lane)
         
-        data_dict_list.extend(dicts)
+        lane_summary_list.extend(lane_summary)
     
         for err in err_list:    
             output.append(err)
-    
-        for summary in summary_list:
-            output.append(summary.replace('\n', '<br />\n'))
-    html = t.render(Context({'data_dict_list': data_dict_list}))
-    output.append('<br />')
-    output.append('<br />')
-    output.append(html)
+   
+    logging.error("len lane summary %d" % (len(lane_summary_list)))
     output.append('<br />')
+    output.append(t.render(Context({'lane_summary_list': lane_summary_list})))
     output.append('<br />')
     
     if record_count == 0:
@@ -194,7 +205,47 @@ def bedfile_fc_cnm_eland_lane(request, fc_id, cnm, lane, ucsc_compatible=False):
         return HttpResponse(bedgen, mimetype="application/x-bedfile")
 
 
-def _summary_stats(flowcell_id, lane):
+def _summary_stats(flowcell_id, lane_id):
+    """
+    Return the summary statistics for a given flowcell, lane, and end.
+    """
+    fc_id = flowcellIdStrip(flowcell_id)
+    fc_result_dict = get_flowcell_result_dict(fc_id)
+
+    summary_list = []
+    err_list = []
+    
+    if fc_result_dict is None:
+        err_list.append('Results for Flowcell %s not found.' % (fc_id))
+        return (summary_list, err_list)
+
+    for cycle_width in fc_result_dict:
+        xmlpath = fc_result_dict[cycle_width]['run_xml']
+        
+        if xmlpath is None:
+            err_list.append('Run xml for Flowcell %s(%s) not found.' % (fc_id, cnm))
+            continue
+        
+        tree = ElementTree.parse(xmlpath).getroot()
+        try:
+            runs = runfolder.PipelineRun(pathname='', xml=tree)
+            gerald_summary = runs.gerald.summary.lane_results
+            for end in range(len(gerald_summary)):
+                eland_summary = runs.gerald.eland_results.results[end][lane_id]
+                # add information to lane_summary
+                eland_summary.flowcell_id = flowcell_id
+                eland_summary.clusters = gerald_summary[end][lane_id].cluster
+                eland_summary.cycle_width = cycle_width
+                eland_summary.summarized_reads = runfolder.summarize_mapped_reads(eland_summary.mapped_reads)
+                summary_list.append(eland_summary)
+
+        except Exception, e:
+            summary_list.append("Summary report needs to be updated.")
+            logging.error("Exception: " + str(e))
+    
+    return (summary_list, err_list)
+
+def _summary_stats_old(flowcell_id, lane):
     """
     return a dictionary of summary stats for a given flowcell_id & lane.
     """
@@ -220,10 +271,15 @@ def _summary_stats(flowcell_id, lane):
         tree = ElementTree.parse(xmlpath).getroot()
         results = runfolder.PipelineRun(pathname='', xml=tree)
         try:
-            summary_list.append(runfolder.summary_report([results]))
-        except:
+            lane_report = runfolder.summarize_lane(results.gerald, lane)
+            summary_list.append(os.linesep.join(lane_report))
+        except Exception, e:
             summary_list.append("Summary report needs to be updated.")
-        
+            logging.error("Exception: " + str(e))
+       
+        print "----------------------------------"
+        print "-- DOES NOT SUPPORT PAIRED END ---"
+        print "----------------------------------"
         lane_results = results.gerald.summary[0][lane]
         lrs = lane_results
         
@@ -280,4 +336,4 @@ def _files(flowcell_id, lane):
         return ''
     
     return '(' + '|'.join(output) + ')'
-
+            
index a9462272ff72a717938fc5df61787b6777e4cdbc..ef751602819a78f28e1ff6cc728725a1db27d43f 100644 (file)
@@ -194,6 +194,50 @@ class ElandLane(object):
         return self._match_codes
     match_codes = property(_get_match_codes)
 
+    def _get_no_match(self):
+        if self._mapped_reads is None:
+            self._update()  
+        return self._match_codes['NM']
+    no_match = property(_get_no_match, 
+                        doc="total reads that didn't match the target genome.")
+
+    def _get_no_match_percent(self):
+        return float(self.no_match)/self.reads * 100 
+    no_match_percent = property(_get_no_match_percent,
+                                doc="no match reads as percent of total")
+
+    def _get_qc_failed(self):
+        if self._mapped_reads is None:
+            self._update()  
+        return self._match_codes['QC']
+    qc_failed = property(_get_qc_failed,
+                        doc="total reads that didn't match the target genome.")
+
+    def _get_qc_failed_percent(self):
+        return float(self.qc_failed)/self.reads * 100 
+    qc_failed_percent = property(_get_qc_failed_percent,
+                                 doc="QC failed reads as percent of total")
+
+    def _get_unique_reads(self):
+        if self._mapped_reads is None:
+           self._update()
+        sum = 0
+        for code in ['U0','U1','U2']:
+            sum += self._match_codes[code]
+        return sum
+    unique_reads = property(_get_unique_reads,
+                            doc="total unique reads")
+
+    def _get_repeat_reads(self):
+        if self._mapped_reads is None:
+           self._update()
+        sum = 0
+        for code in ['R0','R1','R2']:
+            sum += self._match_codes[code]
+        return sum
+    repeat_reads = property(_get_repeat_reads,
+                            doc="total repeat reads")
+    
     def get_elements(self):
         lane = ElementTree.Element(ElandLane.LANE,
                                    {'version':
diff --git a/templates/library.html b/templates/library.html
new file mode 100644 (file)
index 0000000..19e30c7
--- /dev/null
@@ -0,0 +1,46 @@
+<style type="text/css">
+  /* <![CDATA[ */
+  table, td {
+    border-style: solid;
+  }
+  table {
+    border-width: 0 0 1px 1px;
+    border-spacing: 0;
+    border-collapse: collapse;
+  }
+  thead {
+    text-align: center;
+  }
+  td {
+    margin: 0;
+    padding: 4px;
+    border-width: 1px 1px 0 0;
+  }
+  td a {
+    display: block;
+  }
+  /* ]]> */
+</style>
+
+{% block summary_stats %}
+<table>
+  <thead>
+    <tr>
+    <td>Library ID</td>
+    <td>Species</td>
+    <td>Library Name</td>
+    <td>Total Lanes</td>
+    </tr>
+  </thead>
+  <tbody>
+    {% for lib in library_list%}
+    <tr>
+      <td><a href="/library/{{ lib.library_id }}">{{ lib.library_id }}</a></td>
+      <td><a href="/library/{{ lib.library_id }}">{{ lib.species_name }}</a></td>
+      <td><a href="/library/{{ lib.library_id }}">{{ lib.library_name }}</a></td>
+      <td>{{ lib.lanes_run }}</td>
+    </tr>
+    {% endfor %}
+  </tbody>
+</table>
+{% endblock %}
index 78a909e19caccb08ee79899e19a0dea016eeafbf..89c6faa1b4499c01090582801cfa73b54fac13b1 100644 (file)
@@ -1,23 +1,94 @@
+<hr/>
+<br/>
+<style type="text/css">
+  /* <![CDATA[ */
+  table, td {
+    border-style: solid;
+  }
+  table {
+    border-width: 0 0 1px 1px;
+    border-spacing: 0;
+    border-collapse: collapse;
+  }
+  td {
+    margin: 0;
+    padding: 4px;
+    border-width: 1px 1px 0 0;
+  }
+  thead {
+    text-align: center;
+    }
+  tbody {
+    text-align: right;
+  }
+  /* ]]> */
+</style>
 
 {% block summary_stats %}
+<table>
+  <thead>
+    <tr>
+      <td colspan="6"></td>
+      <td colspan="2">No Match</td>
+      <td colspan="2">QC Failed</td>
+      <td colspan="4">Unique</td>
+      <td colspan="4">Repeat</td>
+    </tr>
+    <tr>
+    <td>Cycles</td>
+    <td>Flowcell</td>
+    <td>Lane</td>
+    <td>End</td>
+    <td>Clusters</td>
+    <td>Raw Reads</td>
+    <td>total</td>
+    <td>%</td>
+    <td>total</td>
+    <td>%</td>
+    <td>Total</td>
+    <td>0 mismatch</td>
+    <td>1 mismatch</td>
+    <td>2 mismatch</td>
+    <td>Total</td>
+    <td>0 mismatch</td>
+    <td>1 mismatch</td>
+    <td>2 mismatch</td>
+    </tr>
+  </thead>
+  <tbody>
 
-<table border="1">
-{% for d in data_dict_list %}
-    {# Header row for table #}
-    {% if forloop.first %}
-        <tr>
-        {% for key, value in d.items %}
-            <td><b>{{ key }}</b></td>
-        {% endfor %}
-        </tr>
-    {% endif %}
-    {# Main contents of table #}
+    {% for lane in lane_summary_list %}
     <tr>
-    {% for key, value in d.items %}
-        <td>{{ value }}</td>
-    {% endfor %}
+      <td>{{ lane.cycle_width }}</td>
+      <td>{{ lane.flowcell_id }}</td>
+      <td>{{ lane.lane_id }}</td>
+      <td>{% if lane.end %}{{ lane.end }}{% endif %}</td>
+      <td>{{ lane.clusters.0 }} &plusmn; {{ lane.clusters.1 }}</td>
+      <td>{{ lane.reads }}</td>
+      <td>{{ lane.no_match }}</td>
+      <td>{{ lane.no_match_percent|stringformat:".2f" }}</td>
+      <td>{{ lane.qc_failed }}</td>
+      <td>{{ lane.qc_failed_percent|stringformat:".2f" }}</td>
+      <td>{{ lane.unique_reads }}</td>
+      <td>{{ lane.match_codes.U0 }}</td>
+      <td>{{ lane.match_codes.U1 }}</td>
+      <td>{{ lane.match_codes.U2 }}</td>
+      <td>{{ lane.repeat_reads }}</td>
+      <td>{{ lane.match_codes.R0 }}</td>
+      <td>{{ lane.match_codes.R1 }}</td>
+      <td>{{ lane.match_codes.R2 }}</td>
     </tr>
-{% endfor %}
+    {% endfor %}
+  </tbody>
 </table>
-
+<br/>
+<hr/>
+{% for lane in lane_summary_list %}
+<h2>{{lane.cycle_width}} {{ lane.flowcell_id }} {{ lane.lane_id }} {{ lane.end }}</h2>
+  <ul>
+    {% for name, counts in lane.summarized_reads.items %}
+    <li><b>{{ name }}</b>: {{ counts }}</li>
+    {% endfor %}
+  </ul>
+{% endfor %}
 {% endblock %}