Merge in the library list, detail, and results downloading feature from
authorDiane Trout <diane@caltech.edu>
Fri, 13 Feb 2009 01:51:58 +0000 (01:51 +0000)
committerDiane Trout <diane@caltech.edu>
Fri, 13 Feb 2009 01:51:58 +0000 (01:51 +0000)
the Caltech live site.

There's several components in the frontend tree to render the pages,
in addition this adds in some helper functions in pipelines.eland
to simplify computing summary statistics for an eland lane.

I also needed to merge in a generator based makebed code for
returning the files to the django database.

To use this, the settings file in this branch will need a variable
RESULT_HOME_DIR to be set.

htsworkflow/frontend/samples/results.py [new file with mode: 0644]
htsworkflow/frontend/samples/views.py
htsworkflow/frontend/settings.py
htsworkflow/frontend/templates/samples/library_detail.html [new file with mode: 0644]
htsworkflow/frontend/templates/samples/library_index.html [new file with mode: 0644]
htsworkflow/frontend/urls.py
htsworkflow/pipelines/eland.py
htsworkflow/util/makebed.py
htsworkflow/util/test/test_makebed.py

diff --git a/htsworkflow/frontend/samples/results.py b/htsworkflow/frontend/samples/results.py
new file mode 100644 (file)
index 0000000..d7cb77e
--- /dev/null
@@ -0,0 +1,134 @@
+from htsworkflow.frontend import settings
+
+import glob
+import os
+import re
+
+s_paren = re.compile("^\w+")
+
+def get_flowcell_result_dict(flowcell_id):
+    """
+    returns a dictionary following the following pattern for
+    a given flowcell_id:
+    
+     
+    d['C1-33']['summary']           # Summary.htm file path
+    d['C1-33']['eland_results'][5]  # C1-33 lane 5 file eland results file path
+    d['C1-33']['run_xml']           # run_*.xml file path
+    d['C1-33']['scores']            # scores.tar.gz file path
+    """
+    flowcell_id = flowcell_id.strip()
+    
+    d = {}
+    
+    ################################
+    # Flowcell Directory
+    fc_dir = glob.glob(os.path.join(settings.RESULT_HOME_DIR, flowcell_id))
+    
+    # Not found
+    if len(fc_dir) == 0:
+        return None
+    
+    # No duplicates!
+    assert len(fc_dir) <= 1
+    
+    # Found fc dir
+    fc_dir = fc_dir[0]
+    
+    ################################
+    # C#-## dirs
+    c_dir_list = glob.glob(os.path.join(fc_dir, 'C*'))
+    
+    # Not found
+    if len(c_dir_list) == 0:
+        return d
+    
+    for c_dir_path in c_dir_list:
+        summary_file = glob.glob(os.path.join(c_dir_path, 'Summary.htm'))
+        pathdir, c_dir = os.path.split(c_dir_path)
+        
+        # Create sub-dictionary
+        d[c_dir] = {}
+        
+        
+        ###############################
+        # Summary.htm file
+        
+        # Not found
+        if len(summary_file) == 0:
+            d[c_dir]['summary'] = None
+            
+        # Found
+        else:
+            # No duplicates!
+            assert len(summary_file) == 1
+            
+            summary_file = summary_file[0]
+            d[c_dir]['summary'] = summary_file
+            
+        ###############################
+        # Result files
+        
+        d[c_dir]['eland_results'] = {}
+        
+        result_filepaths = glob.glob(os.path.join(c_dir_path, 's_*_eland_result.txt*'))
+        
+        for filepath in result_filepaths:
+            
+            junk, result_name = os.path.split(filepath)
+            
+            #lanes 1-8, single digit, therefore s_#; # == index 2
+            lane = int(result_name[2])
+            d[c_dir]['eland_results'][lane] = filepath
+            
+        ###############################
+        # run*.xml file
+        run_xml_filepath = glob.glob(os.path.join(c_dir_path, 'run_*.xml'))
+        
+        if len(run_xml_filepath) == 0:
+            d[c_dir]['run_xml'] = None
+        else:
+            # No duplicates
+            assert len(run_xml_filepath) == 1
+            
+            d[c_dir]['run_xml'] = run_xml_filepath[0]
+            
+        ###############################
+        # scores.tar.gz
+        scores_filepath = glob.glob(os.path.join(c_dir_path, 'scores*'))
+        
+        if len(scores_filepath) == 0:
+            d[c_dir]['scores'] = None
+        else:
+            # No duplicates
+            assert len(scores_filepath) == 1
+            
+            d[c_dir]['scores'] = scores_filepath[0]
+        
+    return d
+
+    
+def cn_mTobp(cn_m):
+    """
+    Converts CN-M (i.e. C1-33, C1-26, C4-28) cycle information into
+    number of base pairs.
+    """
+    pass
+
+
+def parse_flowcell_id(flowcell_id):
+    """
+    Return flowcell id and any status encoded in the id
+  
+    We stored the status information in the flowcell id name.
+    this was dumb, but database schemas are hard to update.
+    """
+    fields = flowcell_id.split()
+    fcid = None
+    status = None
+    if len(fields) > 0:
+        fcid = fields[0]
+    if len(fields) > 1:
+        status = fields[1]
+    return fcid, status
+    
index 2299e4f9e2203cbc3052d0ad85d4fbfeeceded84..4763f391dcc0fbd7bc50c7a8dedffb5827f93776 100644 (file)
@@ -1 +1,333 @@
-# Create your views here.
\ No newline at end of file
+# Create your views here.
+from htsworkflow.frontend.samples.models import Library
+from htsworkflow.frontend.samples.results import get_flowcell_result_dict, parse_flowcell_id
+from htsworkflow.pipelines.runfolder import load_pipeline_run_xml
+from htsworkflow.pipelines import runfolder
+from htsworkflow.frontend import settings
+from htsworkflow.util import makebed
+from htsworkflow.util import opener
+
+from django.http import HttpResponse
+from django.template.loader import get_template
+from django.template import Context
+
+import StringIO
+import logging
+import os
+
+LANE_LIST = [1,2,3,4,5,6,7,8]
+
+def create_library_list():
+    """
+    Create a list of libraries that includes how many lanes were run
+    """
+    library_list = []
+    for lib in Library.objects.all():
+       summary = {}
+       summary['library_id'] = lib.library_id
+       summary['library_name'] = lib.library_name
+       summary['species_name' ] = lib.library_species.scientific_name
+       lanes_run = 0
+       for lane_id in LANE_LIST:
+           lane = getattr(lib, 'lane_%d_library' % (lane_id,))
+           lanes_run += len( lane.all() )
+       summary['lanes_run'] = lanes_run
+       library_list.append(summary)
+    return library_list
+
+def library(request):
+    library_list = create_library_list()
+    t = get_template('samples/library_index.html')
+    c = Context({'library_list': library_list })
+    return HttpResponse( t.render(c) )
+
+def library_to_flowcells(request, lib_id):
+    """
+    Display information about all the flowcells a library has been run on.
+    """
+    t = get_template("samples/library_detail.html")
+    
+    try:
+      lib = Library.objects.get(library_id=lib_id)
+    except:
+      return HttpResponse("Library %s does not exist" % (lib_id))
+    
+    output = []
+    
+    output.append('<b>Library ID:</b> %s' % (lib.library_id))
+    output.append('<b>Name:</b> %s' % (lib.library_name))
+    output.append('<b>Species:</b> %s' % (lib.library_species.scientific_name))
+    output.append('')
+    
+    output.append('<b>FLOWCELL - LANE:</b>')
+    
+    output.extend([ '%s - Lane 1 %s' % (fc.flowcell_id, _files(fc.flowcell_id, 1)) for fc in lib.lane_1_library.all() ])
+    output.extend([ '%s - Lane 2 %s' % (fc.flowcell_id, _files(fc.flowcell_id, 2)) for fc in lib.lane_2_library.all() ])
+    output.extend([ '%s - Lane 3 %s' % (fc.flowcell_id, _files(fc.flowcell_id, 3)) for fc in lib.lane_3_library.all() ])
+    output.extend([ '%s - Lane 4 %s' % (fc.flowcell_id, _files(fc.flowcell_id, 4)) for fc in lib.lane_4_library.all() ])
+    output.extend([ '%s - Lane 5 %s' % (fc.flowcell_id, _files(fc.flowcell_id, 5)) for fc in lib.lane_5_library.all() ])
+    output.extend([ '%s - Lane 6 %s' % (fc.flowcell_id, _files(fc.flowcell_id, 6)) for fc in lib.lane_6_library.all() ])
+    output.extend([ '%s - Lane 7 %s' % (fc.flowcell_id, _files(fc.flowcell_id, 7)) for fc in lib.lane_7_library.all() ])
+    output.extend([ '%s - Lane 8 %s' % (fc.flowcell_id, _files(fc.flowcell_id, 8)) for fc in lib.lane_8_library.all() ])
+    
+    record_count = lib.lane_1_library.count() + \
+                    lib.lane_2_library.count() + \
+                    lib.lane_3_library.count() + \
+                    lib.lane_4_library.count() + \
+                    lib.lane_5_library.count() + \
+                    lib.lane_6_library.count() + \
+                    lib.lane_7_library.count() + \
+                    lib.lane_8_library.count()
+    
+    flowcell_list = []
+    flowcell_list.extend([ (fc.flowcell_id, 1) for fc in lib.lane_1_library.all() ])
+    flowcell_list.extend([ (fc.flowcell_id, 2) for fc in lib.lane_2_library.all() ])
+    flowcell_list.extend([ (fc.flowcell_id, 3) for fc in lib.lane_3_library.all() ])
+    flowcell_list.extend([ (fc.flowcell_id, 4) for fc in lib.lane_4_library.all() ])
+    flowcell_list.extend([ (fc.flowcell_id, 5) for fc in lib.lane_5_library.all() ])
+    flowcell_list.extend([ (fc.flowcell_id, 6) for fc in lib.lane_6_library.all() ])
+    flowcell_list.extend([ (fc.flowcell_id, 7) for fc in lib.lane_7_library.all() ])
+    flowcell_list.extend([ (fc.flowcell_id, 8) for fc in lib.lane_8_library.all() ])
+    flowcell_list.sort()
+    
+    lane_summary_list = []
+    for fc, lane in flowcell_list:
+        lane_summary, err_list = _summary_stats(fc, lane)
+        
+        lane_summary_list.extend(lane_summary)
+    
+        for err in err_list:    
+            output.append(err)
+   
+    output.append('<br />')
+    output.append(t.render(Context({'lane_summary_list': lane_summary_list})))
+    output.append('<br />')
+    
+    if record_count == 0:
+        output.append("None Found")
+    
+    return HttpResponse('<br />\n'.join(output))
+
+
+def summaryhtm_fc_cnm(request, fc_id, cnm):
+    """
+    returns a Summary.htm file if it exists.
+    """
+    fc_id, status = parse_flowcell_id(fc_id)
+    d = get_flowcell_result_dict(fc_id)
+    
+    if d is None:
+        return HttpResponse('<b>Results for Flowcell %s not found.</b>' % (fc_id))
+    
+    if cnm not in d:
+        return HttpResponse('<b>Results for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
+    
+    summary_filepath = d[cnm]['summary']
+    
+    if summary_filepath is None:
+        return HttpResponse('<b>Summary.htm for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
+    
+    f = open(summary_filepath, 'r')
+    
+    return HttpResponse(f)
+
+
+def result_fc_cnm_eland_lane(request, fc_id, cnm, lane):
+    """
+    returns an eland_file upon calling.
+    """
+    fc_id, status = parse_flowcell_id(fc_id)
+    d = get_flowcell_result_dict(fc_id)
+    
+    if d is None:
+        return HttpResponse('<b>Results for Flowcell %s not found.</b>' % (fc_id))
+    
+    if cnm not in d:
+        return HttpResponse('<b>Results for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
+    
+    erd = d[cnm]['eland_results']
+    lane = int(lane)
+    
+    if lane not in erd:
+        return HttpResponse('<b>Results for Flowcell %s; %s; lane %s not found.</b>' % (fc_id, cnm, lane))
+    
+    filepath = erd[lane]
+    
+    f = opener.autoopen(filepath, 'r')
+    
+    return HttpResponse(f, mimetype="application/x-elandresult")
+
+
+def bedfile_fc_cnm_eland_lane_ucsc(request, fc_id, cnm, lane):
+    """
+    returns a bed file for a given flowcell, CN-M (i.e. C1-33), and lane (ucsc compatible)
+    """
+    return bedfile_fc_cnm_eland_lane(request, fc_id, cnm, lane, ucsc_compatible=True)
+
+
+def bedfile_fc_cnm_eland_lane(request, fc_id, cnm, lane, ucsc_compatible=False):
+    """
+    returns a bed file for a given flowcell, CN-M (i.e. C1-33), and lane
+    """
+    fc_id, status = parse_flowcell_id(fc_id)
+    d = get_flowcell_result_dict(fc_id)
+    
+    if d is None:
+        return HttpResponse('<b>Results for Flowcell %s not found.</b>' % (fc_id))
+    
+    if cnm not in d:
+        return HttpResponse('<b>Results for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
+    
+    erd = d[cnm]['eland_results']
+    lane = int(lane)
+    
+    if lane not in erd:
+        return HttpResponse('<b>Results for Flowcell %s; %s; lane %s not found.</b>' % (fc_id, cnm, lane))
+    
+    filepath = erd[lane]
+    
+    # Eland result file
+    fi = opener.autoopen(filepath, 'r')
+    # output memory file
+    
+    name, description = makebed.make_description( fc_id, lane )
+    
+    bedgen = makebed.make_bed_from_eland_generator(fi, name, description)
+    
+    if ucsc_compatible:
+        return HttpResponse(bedgen)
+    else:
+        return HttpResponse(bedgen, mimetype="application/x-bedfile")
+
+
+def _summary_stats(flowcell_id, lane_id):
+    """
+    Return the summary statistics for a given flowcell, lane, and end.
+    """
+    fc_id, status = parse_flowcell_id(flowcell_id)
+    fc_result_dict = get_flowcell_result_dict(fc_id)
+
+    summary_list = []
+    err_list = []
+    
+    if fc_result_dict is None:
+        err_list.append('Results for Flowcell %s not found.' % (fc_id))
+        return (summary_list, err_list)
+
+    for cycle_width in fc_result_dict:
+        xmlpath = fc_result_dict[cycle_width]['run_xml']
+        
+        if xmlpath is None:
+            err_list.append('Run xml for Flowcell %s(%s) not found.' % (fc_id, cycle_width))
+            continue
+        
+        try:
+            run = load_pipeline_run_xml(xmlpath)
+            gerald_summary = run.gerald.summary.lane_results
+            for end in range(len(gerald_summary)):
+                eland_summary = run.gerald.eland_results.results[end][lane_id]
+                # add information to lane_summary
+                eland_summary.flowcell_id = flowcell_id
+                eland_summary.clusters = gerald_summary[end][lane_id].cluster
+                eland_summary.cycle_width = cycle_width
+                eland_summary.summarized_reads = runfolder.summarize_mapped_reads(eland_summary.mapped_reads)
+                summary_list.append(eland_summary)
+
+        except Exception, e:
+            summary_list.append("Summary report needs to be updated.")
+            logging.error("Exception: " + str(e))
+    
+    return (summary_list, err_list)
+
+def _summary_stats_old(flowcell_id, lane):
+    """
+    return a dictionary of summary stats for a given flowcell_id & lane.
+    """
+    fc_id, status = parse_flowcell_id(flowcell_id)
+    fc_result_dict = get_flowcell_result_dict(fc_id)
+    
+    dict_list = []
+    err_list = []
+    summary_list = []
+    
+    if fc_result_dict is None:
+        err_list.append('Results for Flowcell %s not found.' % (fc_id))
+        return (dict_list, err_list, summary_list)
+    
+    for cnm in fc_result_dict:
+    
+        xmlpath = fc_result_dict[cnm]['run_xml']
+        
+        if xmlpath is None:
+            err_list.append('Run xml for Flowcell %s(%s) not found.' % (fc_id, cnm))
+            continue
+        
+        tree = ElementTree.parse(xmlpath).getroot()
+        results = runfolder.PipelineRun(pathname='', xml=tree)
+        try:
+            lane_report = runfolder.summarize_lane(results.gerald, lane)
+            summary_list.append(os.linesep.join(lane_report))
+        except Exception, e:
+            summary_list.append("Summary report needs to be updated.")
+            logging.error("Exception: " + str(e))
+       
+        print "----------------------------------"
+        print "-- DOES NOT SUPPORT PAIRED END ---"
+        print "----------------------------------"
+        lane_results = results.gerald.summary[0][lane]
+        lrs = lane_results
+        
+        d = {}
+        
+        d['average_alignment_score'] = lrs.average_alignment_score
+        d['average_first_cycle_intensity'] = lrs.average_first_cycle_intensity
+        d['cluster'] = lrs.cluster
+        d['lane'] = lrs.lane
+        d['flowcell'] = flowcell_id
+        d['cnm'] = cnm
+        d['percent_error_rate'] = lrs.percent_error_rate
+        d['percent_intensity_after_20_cycles'] = lrs.percent_intensity_after_20_cycles
+        d['percent_pass_filter_align'] = lrs.percent_pass_filter_align
+        d['percent_pass_filter_clusters'] = lrs.percent_pass_filter_clusters
+        
+        #FIXME: function finished, but need to take advantage of
+        #   may need to take in a list of lanes so we only have to
+        #   load the xml file once per flowcell rather than once
+        #   per lane.
+        dict_list.append(d)
+    
+    return (dict_list, err_list, summary_list)
+    
+    
+
+    
+def _files(flowcell_id, lane):
+    """
+    Sets up available files for download
+    """
+    flowcell_id, id = parse_flowcell_id(flowcell_id)
+    d = get_flowcell_result_dict(flowcell_id)
+    
+    if d is None:
+        return ''
+    
+    output = []
+    
+    # c_name == 'CN-M' (i.e. C1-33)
+    for c_name in d:
+        
+        if d[c_name]['summary'] is not None:
+            output.append('<a href="/results/%s/%s/summary/">summary(%s)</a>' \
+                          % (flowcell_id, c_name, c_name))
+        
+        erd = d[c_name]['eland_results']
+        
+        if int(lane) in erd:
+            output.append('<a href="/results/%s/%s/eland_result/%s">eland_result(%s)</a>' % (flowcell_id, c_name, lane, c_name))
+            output.append('<a href="/results/%s/%s/bedfile/%s">bedfile(%s)</a>' % (flowcell_id, c_name, lane, c_name))
+    
+    if len(output) == 0:
+        return ''
+    
+    return '(' + '|'.join(output) + ')'
+            
index 76aa1f356fa2ec5697ac0c06c14d3f683c389ec3..a988694d5d503e0073c71ef83744d9e31954819b 100644 (file)
@@ -27,6 +27,9 @@ The options understood by this module are (with their defaults):
 import ConfigParser
 import os
 
+# make epydoc happy
+__docformat__ = "restructuredtext en"
+
 def options_to_list(dest, section_name):
   """
   Load a options from section_name and store in a dictionary
@@ -140,6 +143,7 @@ INSTALLED_APPS = (
     'django.contrib.admin',
     'django.contrib.auth',
     'django.contrib.contenttypes',
+    'django.contrib.humanize',
     'django.contrib.sessions',
     'django.contrib.sites',
     'htsworkflow.frontend.eland_config',
diff --git a/htsworkflow/frontend/templates/samples/library_detail.html b/htsworkflow/frontend/templates/samples/library_detail.html
new file mode 100644 (file)
index 0000000..3533477
--- /dev/null
@@ -0,0 +1,99 @@
+{% load humanize %}
+
+<hr/>
+<br/>
+<style type="text/css">
+  /* <![CDATA[ */
+  table, td {
+    border-style: solid;
+  }
+  table {
+    border-width: 0 0 1px 1px;
+    border-spacing: 0;
+    border-collapse: collapse;
+  }
+  td {
+    margin: 0;
+    padding: 4px;
+    border-width: 1px 1px 0 0;
+  }
+  thead {
+    text-align: center;
+    }
+  tbody {
+    text-align: right;
+  }
+  /* ]]> */
+</style>
+
+{% block summary_stats %}
+<table>
+  <thead>
+    <tr>
+      <td colspan="6"></td>
+      <td colspan="2">No Match</td>
+      <td colspan="2">QC Failed</td>
+      <td colspan="4">Unique</td>
+      <td colspan="4">Repeat</td>
+    </tr>
+    <tr>
+    <td>Cycles</td>
+    <td>Flowcell</td>
+    <td>Lane</td>
+    <td>End</td>
+    <td>Cluster / Tile</td>
+    <td>Raw Reads</td>
+    <td>total</td>
+    <td>%</td>
+    <td>total</td>
+    <td>%</td>
+    <td>0 mismatch</td>
+    <td>1 mismatch</td>
+    <td>2 mismatch</td>
+    <td>Total</td>
+    <td>0 mismatch</td>
+    <td>1 mismatch</td>
+    <td>2 mismatch</td>
+    <td>Total</td>
+    </tr>
+  </thead>
+  <tbody>
+
+    {% for lane in lane_summary_list %}
+    <tr>
+      <td>{{ lane.cycle_width }}</td>
+      <td>{{ lane.flowcell_id }}</td>
+      <td>{{ lane.lane_id }}</td>
+      <td>{% if lane.end %}{{ lane.end }}{% endif %}</td>
+      <td>{{ lane.clusters.0|intcomma }}</td>
+      <td>{{ lane.reads|intcomma }}</td>
+      <td>{{ lane.no_match|intcomma }}</td>
+      <td>{{ lane.no_match_percent|stringformat:".2f" }}</td>
+      <td>{{ lane.qc_failed|intcomma }}</td>
+      <td>{{ lane.qc_failed_percent|stringformat:".2f" }}</td>
+      <td>{{ lane.match_codes.U0|intcomma }}</td>
+      <td>{{ lane.match_codes.U1|intcomma }}</td>
+      <td>{{ lane.match_codes.U2|intcomma }}</td>
+      <td>{{ lane.unique_reads|intcomma }}</td>
+      <td>{{ lane.match_codes.R0|intcomma }}</td>
+      <td>{{ lane.match_codes.R1|intcomma }}</td>
+      <td>{{ lane.match_codes.R2|intcomma }}</td>
+      <td>{{ lane.repeat_reads|intcomma }}</td>
+    </tr>
+    {% endfor %}
+  </tbody>
+</table>
+<br/>
+<hr/>
+{% for lane in lane_summary_list %}
+<h2>
+  {{lane.cycle_width}} {{ lane.flowcell_id }} lane {{ lane.lane_id }} 
+  {% if lane.end %} end {{ lane.end }}{% endif %}
+</h2>
+  <ul>
+    {% for name, counts in lane.summarized_reads.items %}
+    <li><b>{{ name }}</b>: {{ counts|intcomma }}</li>
+    {% endfor %}
+  </ul>
+{% endfor %}
+{% endblock %}
diff --git a/htsworkflow/frontend/templates/samples/library_index.html b/htsworkflow/frontend/templates/samples/library_index.html
new file mode 100644 (file)
index 0000000..19e30c7
--- /dev/null
@@ -0,0 +1,46 @@
+<style type="text/css">
+  /* <![CDATA[ */
+  table, td {
+    border-style: solid;
+  }
+  table {
+    border-width: 0 0 1px 1px;
+    border-spacing: 0;
+    border-collapse: collapse;
+  }
+  thead {
+    text-align: center;
+  }
+  td {
+    margin: 0;
+    padding: 4px;
+    border-width: 1px 1px 0 0;
+  }
+  td a {
+    display: block;
+  }
+  /* ]]> */
+</style>
+
+{% block summary_stats %}
+<table>
+  <thead>
+    <tr>
+    <td>Library ID</td>
+    <td>Species</td>
+    <td>Library Name</td>
+    <td>Total Lanes</td>
+    </tr>
+  </thead>
+  <tbody>
+    {% for lib in library_list%}
+    <tr>
+      <td><a href="/library/{{ lib.library_id }}">{{ lib.library_id }}</a></td>
+      <td><a href="/library/{{ lib.library_id }}">{{ lib.species_name }}</a></td>
+      <td><a href="/library/{{ lib.library_id }}">{{ lib.library_name }}</a></td>
+      <td>{{ lib.lanes_run }}</td>
+    </tr>
+    {% endfor %}
+  </tbody>
+</table>
+{% endblock %}
index b8b9e5ec348e2a19e5466db8366028dca89847e1..ccc31b35be2d5f07587dc1db4fe75be62fc8ad9a 100644 (file)
@@ -19,6 +19,19 @@ urlpatterns = patterns('',
     #(r'^analysis/', include('htsworkflow.frontend.analysis.urls')),
     # Report Views:
     (r'^reports/', include('htsworkflow.frontend.reports.urls')),
+    # Library browser
+    (r'^library/$', 'htsworkflow.frontend.samples.views.library'),
+    (r'^library/(?P<lib_id>\w+)/$', 
+      'htsworkflow.frontend.samples.views.library_to_flowcells'),
+    # Raw result files
+    (r'^results/(?P<fc_id>\w+)/(?P<cnm>C[1-9]-[0-9]+)/summary/',
+      'htsworkflow.frontend.samples.views.summaryhtm_fc_cnm'),
+    (r'^results/(?P<fc_id>\w+)/(?P<cnm>C[1-9]-[0-9]+)/eland_result/(?P<lane>[1-8])',
+      'htsworkflow.frontend.samples.views.result_fc_cnm_eland_lane'),
+    (r'^results/(?P<fc_id>\w+)/(?P<cnm>C[1-9]-[0-9]+)/bedfile/(?P<lane>[1-8])/ucsc',
+      'htsworkflow.frontend.samples.views.bedfile_fc_cnm_eland_lane_ucsc'),
+    (r'^results/(?P<fc_id>\w+)/(?P<cnm>C[1-9]-[0-9]+)/bedfile/(?P<lane>[1-8])',
+      'htsworkflow.frontend.samples.views.bedfile_fc_cnm_eland_lane'),
     
     # databrowser
     #(r'^databrowse/(.*)', databrowse.site.root)
index 05563a1a2cb5ba455c7a4b360c955c60eb630738..40507e63611136e1f10e300bb6f02b7812b02e23 100644 (file)
@@ -194,6 +194,50 @@ class ElandLane(object):
         return self._match_codes
     match_codes = property(_get_match_codes)
 
+    def _get_no_match(self):
+        if self._mapped_reads is None:
+            self._update()  
+        return self._match_codes['NM']
+    no_match = property(_get_no_match, 
+                        doc="total reads that didn't match the target genome.")
+
+    def _get_no_match_percent(self):
+        return float(self.no_match)/self.reads * 100 
+    no_match_percent = property(_get_no_match_percent,
+                                doc="no match reads as percent of total")
+
+    def _get_qc_failed(self):
+        if self._mapped_reads is None:
+            self._update()  
+        return self._match_codes['QC']
+    qc_failed = property(_get_qc_failed,
+                        doc="total reads that didn't match the target genome.")
+
+    def _get_qc_failed_percent(self):
+        return float(self.qc_failed)/self.reads * 100 
+    qc_failed_percent = property(_get_qc_failed_percent,
+                                 doc="QC failed reads as percent of total")
+
+    def _get_unique_reads(self):
+        if self._mapped_reads is None:
+           self._update()
+        sum = 0
+        for code in ['U0','U1','U2']:
+            sum += self._match_codes[code]
+        return sum
+    unique_reads = property(_get_unique_reads,
+                            doc="total unique reads")
+
+    def _get_repeat_reads(self):
+        if self._mapped_reads is None:
+           self._update()
+        sum = 0
+        for code in ['R0','R1','R2']:
+            sum += self._match_codes[code]
+        return sum
+    repeat_reads = property(_get_repeat_reads,
+                            doc="total repeat reads")
+    
     def get_elements(self):
         lane = ElementTree.Element(ElandLane.LANE,
                                    {'version':
index eeb4e88ebc685e867153830ec234bb38b55f5def..cb93163661978c7c36e0b1463591238ce79f9488 100755 (executable)
@@ -4,11 +4,13 @@ Utility functions to make bedfiles.
 import os
 import re
 
+__docformat__ = "restructredtext en"
+
 # map eland_result.txt sense 
 sense_map = { 'F': '+', 'R': '-'}
 sense_color = { 'F': '0,0,255', 'R': '255,255,0' }
 
-def write_bed_header(outstream, name, description):
+def create_bed_header(name, description):
   """
   Produce the headerline for a bedfile
   """
@@ -17,11 +19,33 @@ def write_bed_header(outstream, name, description):
   if description is None: description = "eland result file"
   bed_header = 'track name="%s" description="%s" visibility=4 itemRgb="ON"'
   bed_header += os.linesep
-  outstream.write(bed_header % (name, description))
+  return bed_header
 
 def make_bed_from_eland_stream(instream, outstream, name, description, chromosome_prefix='chr'):
   """
   read an eland result file from instream and write a bedfile to outstream
+
+  :Parameters:
+    - `instream`: stream containing the output from eland 
+    - `outstream`: stream to write the bed file too
+    - `name`: name of bed-file (must be unique)
+    - `description`: longer description of the bed file
+    - `chromosome_prefix`: restrict output lines to fasta records that start with this pattern
+  """
+  for line in make_bed_from_eland_generator(instream, name, description, chromosome_prefix):
+      outstream.write(line)
+
+def make_bed_from_eland_generator(instream, name, description, chromosome_prefix='chr'):
+  """
+  read an eland result file from instream and write a bedfile to outstream
+
+  :Parameters:
+    - `instream`: stream containing the output from eland 
+    - `name`: name of bed-file (must be unique)
+    - `description`: longer description of the bed file
+    - `chromosome_prefix`: restrict output lines to fasta records that start with this pattern
+
+  :Return: generator which yields lines of bedfile
   """
   # indexes into fields in eland_result.txt file
   SEQ = 1
@@ -29,7 +53,7 @@ def make_bed_from_eland_stream(instream, outstream, name, description, chromosom
   START = 7
   SENSE = 8
 
-  write_bed_header(outstream, name, description)
+  yield create_bed_header(name, description)
   prefix_len = len(chromosome_prefix)
 
   for line in instream:
@@ -42,15 +66,14 @@ def make_bed_from_eland_stream(instream, outstream, name, description, chromosom
     # strip off filename extension
     chromosome = fields[CHR].split('.')[0]
 
-    outstream.write('%s %s %d read 0 %s - - %s%s' % (
+    yield '%s %s %d read 0 %s - - %s%s' % (
       chromosome,
       start,
       stop,
       sense_map[fields[SENSE]], 
       sense_color[fields[SENSE]],
       os.linesep  
-    ))
-
+    )
 
 def make_bed_from_multi_eland_stream(
   instream, 
@@ -61,17 +84,25 @@ def make_bed_from_multi_eland_stream(
   max_reads=255
   ):
   """
-  read a multi eland stream and write a bedfile
+  read a multi eland result file from instream and write the bedfile to outstream
+
+  :Parameters:
+    - `instream`: stream containing the output from eland 
+    - `outstream`: stream to write the bed file too
+    - `name`: name of bed-file (must be unique)
+    - `description`: longer description of the bed file
+    - `chromosome_prefix`: restrict output lines to fasta records that start with this pattern
+    - `max_reads`: maximum number of reads to write to bed stream
   """
-  write_bed_header(outstream, name, description)
-  parse_multi_eland(instream, outstream, chr_prefix, max_reads)
-
-def parse_multi_eland(instream, outstream, chr_prefix, max_reads=255):
+  for lane in make_bed_from_multi_eland_generator(instream, name, description, chr_prefix, max_reads):
+      oustream.write(lane)
 
+def make_bed_from_multi_eland_generator(instream, name, description, chr_prefix, max_reads=255):
   loc_pattern = '(?P<fullloc>(?P<start>[0-9]+)(?P<dir>[FR])(?P<count>[0-9]+))'
   other_pattern = '(?P<chr>[^:,]+)'
   split_re = re.compile('(%s|%s)' % (loc_pattern, other_pattern))
 
+  yield create_bed_header(name, description)
   for line in instream:
     rec = line.split()
     if len(rec) > 3:
@@ -110,35 +141,30 @@ def parse_multi_eland(instream, outstream, chr_prefix, max_reads=255):
         if reported_reads <= max_reads:
           for cur_chr, start, stop, strand, color in read_list:
             reported_reads += 1
-            outstream.write('%s %d %d read 0 %s - - %s%s' % (
+            yield '%s %d %d read 0 %s - - %s%s' % (
                 cur_chr,
                 start,
                 stop,
                 sense_map[orientation],
                 sense_color[orientation],
                 os.linesep
-            ))
+            )
 
-def make_description(database, flowcell_id, lane):
+def make_description(flowcell_id, lane):
     """
-    compute a bedfile name and description from the fctracker database
+    compute a bedfile name and description from the django database
     """
-    from htsworkflow.util.fctracker import fctracker
+    from htsworkflow.frontend.experiments import models as experiments
 
-    fc = fctracker(database)
-    cells = fc._get_flowcells("where flowcell_id='%s'" % (flowcell_id))
-    if len(cells) != 1:
-      raise RuntimeError("couldn't find flowcell id %s" % (flowcell_id))
     lane = int(lane)
     if lane < 1 or lane > 8:
       raise RuntimeError("flowcells only have lanes 1-8")
 
-    name = "%s-%s" % (flowcell_id, lane)
+    cell = experiments.FlowCell.objects.get(flowcell_id=flowcell_id)
 
-    cell_id, cell = cells.items()[0]
-    assert cell_id == flowcell_id
+    name = "%s-%s" % (flowcell_id, lane)
 
-    cell_library_id = cell['lane_%d_library_id' %(lane,)]
-    cell_library = cell['lane_%d_library' %(lane,)]
-    description = "%s-%s" % (cell_library['library_name'], cell_library_id)
+    cell_library = getattr(cell, 'lane_%d_library' %(lane,))
+    cell_library_id = cell_library.library_id
+    description = "%s-%s" % (cell_library.library_name, cell_library_id)
     return name, description
index 03c79192fd6e27ea79818244cab88253ed68b5ae..b5d3026578fe967cee0fbf0d9fe5dd48042b1a7c 100644 (file)
@@ -9,38 +9,42 @@ class testMakeBed(unittest.TestCase):
       instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:112:383    TCAAATCTTATGCTANGAATCNCAAATTTTCT 1:0:0   mm9_chr13_random.fa:1240R0')
       out = StringIO()
 
-      makebed.parse_multi_eland(instream, out, 'mm9_chr', 1)
-      self.failUnlessEqual(out.getvalue(), 'mm9_chr13_random 1240 1272 read 0 - - - 255,255,0\n')
+      out = list(makebed.make_bed_from_multi_eland_generator(instream, 'name', 'description', 'mm9_chr', 1))
+      self.failUnlessEqual(out[1], 'mm9_chr13_random 1240 1272 read 0 - - - 255,255,0\n')
 
     def test_multi_1_0_0_limit_255(self):
       instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:112:383    TCAAATCTTATGCTANGAATCNCAAATTTTCT 1:0:0   mm9_chr13_random.fa:1240R0')
       out = StringIO()
 
-      makebed.parse_multi_eland(instream, out, 'mm9_chr', 255)
-      self.failUnlessEqual(out.getvalue(), 'mm9_chr13_random 1240 1272 read 0 - - - 255,255,0\n')
+      out = list(makebed.make_bed_from_multi_eland_generator(instream, 'name', 'desc', 'mm9_chr', 255))
+      self.failUnlessEqual(out[1], 'mm9_chr13_random 1240 1272 read 0 - - - 255,255,0\n')
 
 
     def test_multi_2_0_0_limit_1(self):
       instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:104:586    GTTCTCGCATAAACTNACTCTNAATAGATTCA 2:0:0   mm9_chr4.fa:42995432F0,mm9_chrX.fa:101541458F0')
       out = StringIO()
 
-      makebed.parse_multi_eland(instream, out, 'mm9_chr', 1)
-      self.failUnlessEqual(out.len, 0)
+      out = list(makebed.make_bed_from_multi_eland_generator(instream, 'name', 'desc', 'mm9_chr', 1))
+      self.failUnlessEqual(len(out), 1)
 
     def test_multi_2_0_0_limit_255(self):
       instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:104:586    GTTCTCGCATAAACTNACTCTNAATAGATTCA 2:0:0   mm9_chr4.fa:42995432F0,mm9_chrX.fa:101541458F0')
       out = StringIO()
 
-      makebed.parse_multi_eland(instream, out, 'mm9_chr', 255)
-      self.failUnlessEqual(out.len, 98)
+      out = list(makebed.make_bed_from_multi_eland_generator(instream, 'name', 'desc', 'mm9_chr', 255))
+      self.failUnlessEqual(len(out), 3)
+      self.failUnlessEqual(out[1], 
+        'mm9_chr4 42995432 42995464 read 0 + - - 0,0,255\n')
+      self.failUnlessEqual(out[2], 
+        'mm9_chrX 101541458 101541490 read 0 + - - 0,0,255\n')
 
     def test_multi_0_2_0_limit_1(self):
       instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:115:495    TCTCCCTGAAAAATANAAGTGNTGTTGGTGAG        0:2:1   mm9_chr14.fa:104434729F2,mm9_chr16.fa:63263818R1,mm9_chr2.fa:52265438R1')
       out = StringIO()
 
-      makebed.parse_multi_eland(instream, out, 'mm9_chr', 1)
-      print out.getvalue()
-      self.failUnlessEqual(out.len, 0)
+      out = list(makebed.make_bed_from_multi_eland_generator(instream, 'name', 'desc', 'mm9_chr', 1))
+      print out
+      self.failUnlessEqual(len(out), 1)
 
 def suite():
     return unittest.makeSuite(testMakeBed, 'test')