Merge in the library list, detail, and results downloading feature from

author Diane Trout <diane@caltech.edu>

Fri, 13 Feb 2009 01:51:58 +0000 (01:51 +0000)

committer Diane Trout <diane@caltech.edu>

Fri, 13 Feb 2009 01:51:58 +0000 (01:51 +0000)
author Diane Trout <diane@caltech.edu>
Fri, 13 Feb 2009 01:51:58 +0000 (01:51 +0000)
committer Diane Trout <diane@caltech.edu>
Fri, 13 Feb 2009 01:51:58 +0000 (01:51 +0000)
diff --git a/htsworkflow/frontend/samples/results.py b/htsworkflow/frontend/samples/results.py

new file mode 100644 (file)

index 0000000..d7cb77e
--- /dev/null
+++ b/htsworkflow/frontend/samples/results.py
@@ -0,0 +1,134 @@
+from htsworkflow.frontend import settings
+
+import glob
+import os
+import re
+
+s_paren = re.compile("^\w+")
+
+def get_flowcell_result_dict(flowcell_id):
+    """
+    returns a dictionary following the following pattern for
+    a given flowcell_id:
+    
+     
+    d['C1-33']['summary']           # Summary.htm file path
+    d['C1-33']['eland_results'][5]  # C1-33 lane 5 file eland results file path
+    d['C1-33']['run_xml']           # run_*.xml file path
+    d['C1-33']['scores']            # scores.tar.gz file path
+    """
+    flowcell_id = flowcell_id.strip()
+    
+    d = {}
+    
+    ################################
+    # Flowcell Directory
+    fc_dir = glob.glob(os.path.join(settings.RESULT_HOME_DIR, flowcell_id))
+    
+    # Not found
+    if len(fc_dir) == 0:
+        return None
+    
+    # No duplicates!
+    assert len(fc_dir) <= 1
+    
+    # Found fc dir
+    fc_dir = fc_dir[0]
+    
+    ################################
+    # C#-## dirs
+    c_dir_list = glob.glob(os.path.join(fc_dir, 'C*'))
+    
+    # Not found
+    if len(c_dir_list) == 0:
+        return d
+    
+    for c_dir_path in c_dir_list:
+        summary_file = glob.glob(os.path.join(c_dir_path, 'Summary.htm'))
+        pathdir, c_dir = os.path.split(c_dir_path)
+        
+        # Create sub-dictionary
+        d[c_dir] = {}
+        
+        
+        ###############################
+        # Summary.htm file
+        
+        # Not found
+        if len(summary_file) == 0:
+            d[c_dir]['summary'] = None
+            
+        # Found
+        else:
+            # No duplicates!
+            assert len(summary_file) == 1
+            
+            summary_file = summary_file[0]
+            d[c_dir]['summary'] = summary_file
+            
+        ###############################
+        # Result files
+        
+        d[c_dir]['eland_results'] = {}
+        
+        result_filepaths = glob.glob(os.path.join(c_dir_path, 's_*_eland_result.txt*'))
+        
+        for filepath in result_filepaths:
+            
+            junk, result_name = os.path.split(filepath)
+            
+            #lanes 1-8, single digit, therefore s_#; # == index 2
+            lane = int(result_name[2])
+            d[c_dir]['eland_results'][lane] = filepath
+            
+        ###############################
+        # run*.xml file
+        run_xml_filepath = glob.glob(os.path.join(c_dir_path, 'run_*.xml'))
+        
+        if len(run_xml_filepath) == 0:
+            d[c_dir]['run_xml'] = None
+        else:
+            # No duplicates
+            assert len(run_xml_filepath) == 1
+            
+            d[c_dir]['run_xml'] = run_xml_filepath[0]
+            
+        ###############################
+        # scores.tar.gz
+        scores_filepath = glob.glob(os.path.join(c_dir_path, 'scores*'))
+        
+        if len(scores_filepath) == 0:
+            d[c_dir]['scores'] = None
+        else:
+            # No duplicates
+            assert len(scores_filepath) == 1
+            
+            d[c_dir]['scores'] = scores_filepath[0]
+        
+    return d
+
+    
+def cn_mTobp(cn_m):
+    """
+    Converts CN-M (i.e. C1-33, C1-26, C4-28) cycle information into
+    number of base pairs.
+    """
+    pass
+
+
+def parse_flowcell_id(flowcell_id):
+    """
+    Return flowcell id and any status encoded in the id
+  
+    We stored the status information in the flowcell id name.
+    this was dumb, but database schemas are hard to update.
+    """
+    fields = flowcell_id.split()
+    fcid = None
+    status = None
+    if len(fields) > 0:
+        fcid = fields[0]
+    if len(fields) > 1:
+        status = fields[1]
+    return fcid, status
+    
diff --git a/htsworkflow/frontend/samples/views.py b/htsworkflow/frontend/samples/views.py

index 2299e4f9e2203cbc3052d0ad85d4fbfeeceded84..4763f391dcc0fbd7bc50c7a8dedffb5827f93776 100644 (file)
--- a/htsworkflow/frontend/samples/views.py
+++ b/htsworkflow/frontend/samples/views.py
@@ -1 +1,333 @@
-# Create your views here.
-\ No newline at end of file
+# Create your views here.
+from htsworkflow.frontend.samples.models import Library
+from htsworkflow.frontend.samples.results import get_flowcell_result_dict, parse_flowcell_id
+from htsworkflow.pipelines.runfolder import load_pipeline_run_xml
+from htsworkflow.pipelines import runfolder
+from htsworkflow.frontend import settings
+from htsworkflow.util import makebed
+from htsworkflow.util import opener
+
+from django.http import HttpResponse
+from django.template.loader import get_template
+from django.template import Context
+
+import StringIO
+import logging
+import os
+
+LANE_LIST = [1,2,3,4,5,6,7,8]
+
+def create_library_list():
+    """
+    Create a list of libraries that includes how many lanes were run
+    """
+    library_list = []
+    for lib in Library.objects.all():
+       summary = {}
+       summary['library_id'] = lib.library_id
+       summary['library_name'] = lib.library_name
+       summary['species_name' ] = lib.library_species.scientific_name
+       lanes_run = 0
+       for lane_id in LANE_LIST:
+           lane = getattr(lib, 'lane_%d_library' % (lane_id,))
+           lanes_run += len( lane.all() )
+       summary['lanes_run'] = lanes_run
+       library_list.append(summary)
+    return library_list
+
+def library(request):
+    library_list = create_library_list()
+    t = get_template('samples/library_index.html')
+    c = Context({'library_list': library_list })
+    return HttpResponse( t.render(c) )
+
+def library_to_flowcells(request, lib_id):
+    """
+    Display information about all the flowcells a library has been run on.
+    """
+    t = get_template("samples/library_detail.html")
+    
+    try:
+      lib = Library.objects.get(library_id=lib_id)
+    except:
+      return HttpResponse("Library %s does not exist" % (lib_id))
+    
+    output = []
+    
+    output.append('<b>Library ID:</b> %s' % (lib.library_id))
+    output.append('<b>Name:</b> %s' % (lib.library_name))
+    output.append('<b>Species:</b> %s' % (lib.library_species.scientific_name))
+    output.append('')
+    
+    output.append('<b>FLOWCELL - LANE:</b>')
+    
+    output.extend([ '%s - Lane 1 %s' % (fc.flowcell_id, _files(fc.flowcell_id, 1)) for fc in lib.lane_1_library.all() ])
+    output.extend([ '%s - Lane 2 %s' % (fc.flowcell_id, _files(fc.flowcell_id, 2)) for fc in lib.lane_2_library.all() ])
+    output.extend([ '%s - Lane 3 %s' % (fc.flowcell_id, _files(fc.flowcell_id, 3)) for fc in lib.lane_3_library.all() ])
+    output.extend([ '%s - Lane 4 %s' % (fc.flowcell_id, _files(fc.flowcell_id, 4)) for fc in lib.lane_4_library.all() ])
+    output.extend([ '%s - Lane 5 %s' % (fc.flowcell_id, _files(fc.flowcell_id, 5)) for fc in lib.lane_5_library.all() ])
+    output.extend([ '%s - Lane 6 %s' % (fc.flowcell_id, _files(fc.flowcell_id, 6)) for fc in lib.lane_6_library.all() ])
+    output.extend([ '%s - Lane 7 %s' % (fc.flowcell_id, _files(fc.flowcell_id, 7)) for fc in lib.lane_7_library.all() ])
+    output.extend([ '%s - Lane 8 %s' % (fc.flowcell_id, _files(fc.flowcell_id, 8)) for fc in lib.lane_8_library.all() ])
+    
+    record_count = lib.lane_1_library.count() + \
+                    lib.lane_2_library.count() + \
+                    lib.lane_3_library.count() + \
+                    lib.lane_4_library.count() + \
+                    lib.lane_5_library.count() + \
+                    lib.lane_6_library.count() + \
+                    lib.lane_7_library.count() + \
+                    lib.lane_8_library.count()
+    
+    flowcell_list = []
+    flowcell_list.extend([ (fc.flowcell_id, 1) for fc in lib.lane_1_library.all() ])
+    flowcell_list.extend([ (fc.flowcell_id, 2) for fc in lib.lane_2_library.all() ])
+    flowcell_list.extend([ (fc.flowcell_id, 3) for fc in lib.lane_3_library.all() ])
+    flowcell_list.extend([ (fc.flowcell_id, 4) for fc in lib.lane_4_library.all() ])
+    flowcell_list.extend([ (fc.flowcell_id, 5) for fc in lib.lane_5_library.all() ])
+    flowcell_list.extend([ (fc.flowcell_id, 6) for fc in lib.lane_6_library.all() ])
+    flowcell_list.extend([ (fc.flowcell_id, 7) for fc in lib.lane_7_library.all() ])
+    flowcell_list.extend([ (fc.flowcell_id, 8) for fc in lib.lane_8_library.all() ])
+    flowcell_list.sort()
+    
+    lane_summary_list = []
+    for fc, lane in flowcell_list:
+        lane_summary, err_list = _summary_stats(fc, lane)
+        
+        lane_summary_list.extend(lane_summary)
+    
+        for err in err_list:    
+            output.append(err)
+   
+    output.append('<br />')
+    output.append(t.render(Context({'lane_summary_list': lane_summary_list})))
+    output.append('<br />')
+    
+    if record_count == 0:
+        output.append("None Found")
+    
+    return HttpResponse('<br />\n'.join(output))
+
+
+def summaryhtm_fc_cnm(request, fc_id, cnm):
+    """
+    returns a Summary.htm file if it exists.
+    """
+    fc_id, status = parse_flowcell_id(fc_id)
+    d = get_flowcell_result_dict(fc_id)
+    
+    if d is None:
+        return HttpResponse('<b>Results for Flowcell %s not found.</b>' % (fc_id))
+    
+    if cnm not in d:
+        return HttpResponse('<b>Results for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
+    
+    summary_filepath = d[cnm]['summary']
+    
+    if summary_filepath is None:
+        return HttpResponse('<b>Summary.htm for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
+    
+    f = open(summary_filepath, 'r')
+    
+    return HttpResponse(f)
+
+
+def result_fc_cnm_eland_lane(request, fc_id, cnm, lane):
+    """
+    returns an eland_file upon calling.
+    """
+    fc_id, status = parse_flowcell_id(fc_id)
+    d = get_flowcell_result_dict(fc_id)
+    
+    if d is None:
+        return HttpResponse('<b>Results for Flowcell %s not found.</b>' % (fc_id))
+    
+    if cnm not in d:
+        return HttpResponse('<b>Results for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
+    
+    erd = d[cnm]['eland_results']
+    lane = int(lane)
+    
+    if lane not in erd:
+        return HttpResponse('<b>Results for Flowcell %s; %s; lane %s not found.</b>' % (fc_id, cnm, lane))
+    
+    filepath = erd[lane]
+    
+    f = opener.autoopen(filepath, 'r')
+    
+    return HttpResponse(f, mimetype="application/x-elandresult")
+
+
+def bedfile_fc_cnm_eland_lane_ucsc(request, fc_id, cnm, lane):
+    """
+    returns a bed file for a given flowcell, CN-M (i.e. C1-33), and lane (ucsc compatible)
+    """
+    return bedfile_fc_cnm_eland_lane(request, fc_id, cnm, lane, ucsc_compatible=True)
+
+
+def bedfile_fc_cnm_eland_lane(request, fc_id, cnm, lane, ucsc_compatible=False):
+    """
+    returns a bed file for a given flowcell, CN-M (i.e. C1-33), and lane
+    """
+    fc_id, status = parse_flowcell_id(fc_id)
+    d = get_flowcell_result_dict(fc_id)
+    
+    if d is None:
+        return HttpResponse('<b>Results for Flowcell %s not found.</b>' % (fc_id))
+    
+    if cnm not in d:
+        return HttpResponse('<b>Results for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
+    
+    erd = d[cnm]['eland_results']
+    lane = int(lane)
+    
+    if lane not in erd:
+        return HttpResponse('<b>Results for Flowcell %s; %s; lane %s not found.</b>' % (fc_id, cnm, lane))
+    
+    filepath = erd[lane]
+    
+    # Eland result file
+    fi = opener.autoopen(filepath, 'r')
+    # output memory file
+    
+    name, description = makebed.make_description( fc_id, lane )
+    
+    bedgen = makebed.make_bed_from_eland_generator(fi, name, description)
+    
+    if ucsc_compatible:
+        return HttpResponse(bedgen)
+    else:
+        return HttpResponse(bedgen, mimetype="application/x-bedfile")
+
+
+def _summary_stats(flowcell_id, lane_id):
+    """
+    Return the summary statistics for a given flowcell, lane, and end.
+    """
+    fc_id, status = parse_flowcell_id(flowcell_id)
+    fc_result_dict = get_flowcell_result_dict(fc_id)
+
+    summary_list = []
+    err_list = []
+    
+    if fc_result_dict is None:
+        err_list.append('Results for Flowcell %s not found.' % (fc_id))
+        return (summary_list, err_list)
+
+    for cycle_width in fc_result_dict:
+        xmlpath = fc_result_dict[cycle_width]['run_xml']
+        
+        if xmlpath is None:
+            err_list.append('Run xml for Flowcell %s(%s) not found.' % (fc_id, cycle_width))
+            continue
+        
+        try:
+            run = load_pipeline_run_xml(xmlpath)
+            gerald_summary = run.gerald.summary.lane_results
+            for end in range(len(gerald_summary)):
+                eland_summary = run.gerald.eland_results.results[end][lane_id]
+                # add information to lane_summary
+                eland_summary.flowcell_id = flowcell_id
+                eland_summary.clusters = gerald_summary[end][lane_id].cluster
+                eland_summary.cycle_width = cycle_width
+                eland_summary.summarized_reads = runfolder.summarize_mapped_reads(eland_summary.mapped_reads)
+                summary_list.append(eland_summary)
+
+        except Exception, e:
+            summary_list.append("Summary report needs to be updated.")
+            logging.error("Exception: " + str(e))
+    
+    return (summary_list, err_list)
+
+def _summary_stats_old(flowcell_id, lane):
+    """
+    return a dictionary of summary stats for a given flowcell_id & lane.
+    """
+    fc_id, status = parse_flowcell_id(flowcell_id)
+    fc_result_dict = get_flowcell_result_dict(fc_id)
+    
+    dict_list = []
+    err_list = []
+    summary_list = []
+    
+    if fc_result_dict is None:
+        err_list.append('Results for Flowcell %s not found.' % (fc_id))
+        return (dict_list, err_list, summary_list)
+    
+    for cnm in fc_result_dict:
+    
+        xmlpath = fc_result_dict[cnm]['run_xml']
+        
+        if xmlpath is None:
+            err_list.append('Run xml for Flowcell %s(%s) not found.' % (fc_id, cnm))
+            continue
+        
+        tree = ElementTree.parse(xmlpath).getroot()
+        results = runfolder.PipelineRun(pathname='', xml=tree)
+        try:
+            lane_report = runfolder.summarize_lane(results.gerald, lane)
+            summary_list.append(os.linesep.join(lane_report))
+        except Exception, e:
+            summary_list.append("Summary report needs to be updated.")
+            logging.error("Exception: " + str(e))
+       
+        print "----------------------------------"
+        print "-- DOES NOT SUPPORT PAIRED END ---"
+        print "----------------------------------"
+        lane_results = results.gerald.summary[0][lane]
+        lrs = lane_results
+        
+        d = {}
+        
+        d['average_alignment_score'] = lrs.average_alignment_score
+        d['average_first_cycle_intensity'] = lrs.average_first_cycle_intensity
+        d['cluster'] = lrs.cluster
+        d['lane'] = lrs.lane
+        d['flowcell'] = flowcell_id
+        d['cnm'] = cnm
+        d['percent_error_rate'] = lrs.percent_error_rate
+        d['percent_intensity_after_20_cycles'] = lrs.percent_intensity_after_20_cycles
+        d['percent_pass_filter_align'] = lrs.percent_pass_filter_align
+        d['percent_pass_filter_clusters'] = lrs.percent_pass_filter_clusters
+        
+        #FIXME: function finished, but need to take advantage of
+        #   may need to take in a list of lanes so we only have to
+        #   load the xml file once per flowcell rather than once
+        #   per lane.
+        dict_list.append(d)
+    
+    return (dict_list, err_list, summary_list)
+    
+    
+
+    
+def _files(flowcell_id, lane):
+    """
+    Sets up available files for download
+    """
+    flowcell_id, id = parse_flowcell_id(flowcell_id)
+    d = get_flowcell_result_dict(flowcell_id)
+    
+    if d is None:
+        return ''
+    
+    output = []
+    
+    # c_name == 'CN-M' (i.e. C1-33)
+    for c_name in d:
+        
+        if d[c_name]['summary'] is not None:
+            output.append('<a href="/results/%s/%s/summary/">summary(%s)</a>' \
+                          % (flowcell_id, c_name, c_name))
+        
+        erd = d[c_name]['eland_results']
+        
+        if int(lane) in erd:
+            output.append('<a href="/results/%s/%s/eland_result/%s">eland_result(%s)</a>' % (flowcell_id, c_name, lane, c_name))
+            output.append('<a href="/results/%s/%s/bedfile/%s">bedfile(%s)</a>' % (flowcell_id, c_name, lane, c_name))
+    
+    if len(output) == 0:
+        return ''
+    
+    return '(' + '|'.join(output) + ')'
+            
diff --git a/htsworkflow/frontend/settings.py b/htsworkflow/frontend/settings.py

index 76aa1f356fa2ec5697ac0c06c14d3f683c389ec3..a988694d5d503e0073c71ef83744d9e31954819b 100644 (file)
--- a/htsworkflow/frontend/settings.py
+++ b/htsworkflow/frontend/settings.py
@@ -27,6 +27,9 @@ The options understood by this module are (with their defaults):
  import ConfigParser
  import os
  
+# make epydoc happy
+__docformat__ = "restructuredtext en"
+
  def options_to_list(dest, section_name):
    """
    Load a options from section_name and store in a dictionary
@@ -140,6 +143,7 @@ INSTALLED_APPS = (
      'django.contrib.admin',
      'django.contrib.auth',
      'django.contrib.contenttypes',
+    'django.contrib.humanize',
      'django.contrib.sessions',
      'django.contrib.sites',
      'htsworkflow.frontend.eland_config',
diff --git a/htsworkflow/frontend/templates/samples/library_detail.html b/htsworkflow/frontend/templates/samples/library_detail.html

new file mode 100644 (file)

index 0000000..3533477
--- /dev/null
+++ b/htsworkflow/frontend/templates/samples/library_detail.html
@@ -0,0 +1,99 @@
+{% load humanize %}
+
+<hr/>
+<br/>
+<style type="text/css">
+  /* <![CDATA[ */
+  table, td {
+    border-style: solid;
+  }
+  table {
+    border-width: 0 0 1px 1px;
+    border-spacing: 0;
+    border-collapse: collapse;
+  }
+  td {
+    margin: 0;
+    padding: 4px;
+    border-width: 1px 1px 0 0;
+  }
+  thead {
+    text-align: center;
+    }
+  tbody {
+    text-align: right;
+  }
+  /* ]]> */
+</style>
+
+{% block summary_stats %}
+<table>
+  <thead>
+    <tr>
+      <td colspan="6"></td>
+      <td colspan="2">No Match</td>
+      <td colspan="2">QC Failed</td>
+      <td colspan="4">Unique</td>
+      <td colspan="4">Repeat</td>
+    </tr>
+    <tr>
+    <td>Cycles</td>
+    <td>Flowcell</td>
+    <td>Lane</td>
+    <td>End</td>
+    <td>Cluster / Tile</td>
+    <td>Raw Reads</td>
+    <td>total</td>
+    <td>%</td>
+    <td>total</td>
+    <td>%</td>
+    <td>0 mismatch</td>
+    <td>1 mismatch</td>
+    <td>2 mismatch</td>
+    <td>Total</td>
+    <td>0 mismatch</td>
+    <td>1 mismatch</td>
+    <td>2 mismatch</td>
+    <td>Total</td>
+    </tr>
+  </thead>
+  <tbody>
+
+    {% for lane in lane_summary_list %}
+    <tr>
+      <td>{{ lane.cycle_width }}</td>
+      <td>{{ lane.flowcell_id }}</td>
+      <td>{{ lane.lane_id }}</td>
+      <td>{% if lane.end %}{{ lane.end }}{% endif %}</td>
+      <td>{{ lane.clusters.0|intcomma }}</td>
+      <td>{{ lane.reads|intcomma }}</td>
+      <td>{{ lane.no_match|intcomma }}</td>
+      <td>{{ lane.no_match_percent|stringformat:".2f" }}</td>
+      <td>{{ lane.qc_failed|intcomma }}</td>
+      <td>{{ lane.qc_failed_percent|stringformat:".2f" }}</td>
+      <td>{{ lane.match_codes.U0|intcomma }}</td>
+      <td>{{ lane.match_codes.U1|intcomma }}</td>
+      <td>{{ lane.match_codes.U2|intcomma }}</td>
+      <td>{{ lane.unique_reads|intcomma }}</td>
+      <td>{{ lane.match_codes.R0|intcomma }}</td>
+      <td>{{ lane.match_codes.R1|intcomma }}</td>
+      <td>{{ lane.match_codes.R2|intcomma }}</td>
+      <td>{{ lane.repeat_reads|intcomma }}</td>
+    </tr>
+    {% endfor %}
+  </tbody>
+</table>
+<br/>
+<hr/>
+{% for lane in lane_summary_list %}
+<h2>
+  {{lane.cycle_width}} {{ lane.flowcell_id }} lane {{ lane.lane_id }} 
+  {% if lane.end %} end {{ lane.end }}{% endif %}
+</h2>
+  <ul>
+    {% for name, counts in lane.summarized_reads.items %}
+    <li><b>{{ name }}</b>: {{ counts|intcomma }}</li>
+    {% endfor %}
+  </ul>
+{% endfor %}
+{% endblock %}
diff --git a/htsworkflow/frontend/templates/samples/library_index.html b/htsworkflow/frontend/templates/samples/library_index.html

new file mode 100644 (file)

index 0000000..19e30c7
--- /dev/null
+++ b/htsworkflow/frontend/templates/samples/library_index.html
@@ -0,0 +1,46 @@
+<style type="text/css">
+  /* <![CDATA[ */
+  table, td {
+    border-style: solid;
+  }
+  table {
+    border-width: 0 0 1px 1px;
+    border-spacing: 0;
+    border-collapse: collapse;
+  }
+  thead {
+    text-align: center;
+  }
+  td {
+    margin: 0;
+    padding: 4px;
+    border-width: 1px 1px 0 0;
+  }
+  td a {
+    display: block;
+  }
+  /* ]]> */
+</style>
+
+{% block summary_stats %}
+<table>
+  <thead>
+    <tr>
+    <td>Library ID</td>
+    <td>Species</td>
+    <td>Library Name</td>
+    <td>Total Lanes</td>
+    </tr>
+  </thead>
+  <tbody>
+    {% for lib in library_list%}
+    <tr>
+      <td><a href="/library/{{ lib.library_id }}">{{ lib.library_id }}</a></td>
+      <td><a href="/library/{{ lib.library_id }}">{{ lib.species_name }}</a></td>
+      <td><a href="/library/{{ lib.library_id }}">{{ lib.library_name }}</a></td>
+      <td>{{ lib.lanes_run }}</td>
+    </tr>
+    {% endfor %}
+  </tbody>
+</table>
+{% endblock %}
diff --git a/htsworkflow/frontend/urls.py b/htsworkflow/frontend/urls.py

index b8b9e5ec348e2a19e5466db8366028dca89847e1..ccc31b35be2d5f07587dc1db4fe75be62fc8ad9a 100644 (file)
--- a/htsworkflow/frontend/urls.py
+++ b/htsworkflow/frontend/urls.py
@@ -19,6 +19,19 @@ urlpatterns = patterns('',
      #(r'^analysis/', include('htsworkflow.frontend.analysis.urls')),
      # Report Views:
      (r'^reports/', include('htsworkflow.frontend.reports.urls')),
+    # Library browser
+    (r'^library/$', 'htsworkflow.frontend.samples.views.library'),
+    (r'^library/(?P<lib_id>\w+)/$', 
+      'htsworkflow.frontend.samples.views.library_to_flowcells'),
+    # Raw result files
+    (r'^results/(?P<fc_id>\w+)/(?P<cnm>C[1-9]-[0-9]+)/summary/',
+      'htsworkflow.frontend.samples.views.summaryhtm_fc_cnm'),
+    (r'^results/(?P<fc_id>\w+)/(?P<cnm>C[1-9]-[0-9]+)/eland_result/(?P<lane>[1-8])',
+      'htsworkflow.frontend.samples.views.result_fc_cnm_eland_lane'),
+    (r'^results/(?P<fc_id>\w+)/(?P<cnm>C[1-9]-[0-9]+)/bedfile/(?P<lane>[1-8])/ucsc',
+      'htsworkflow.frontend.samples.views.bedfile_fc_cnm_eland_lane_ucsc'),
+    (r'^results/(?P<fc_id>\w+)/(?P<cnm>C[1-9]-[0-9]+)/bedfile/(?P<lane>[1-8])',
+      'htsworkflow.frontend.samples.views.bedfile_fc_cnm_eland_lane'),
      
      # databrowser
      #(r'^databrowse/(.*)', databrowse.site.root)
diff --git a/htsworkflow/pipelines/eland.py b/htsworkflow/pipelines/eland.py

index 05563a1a2cb5ba455c7a4b360c955c60eb630738..40507e63611136e1f10e300bb6f02b7812b02e23 100644 (file)
--- a/htsworkflow/pipelines/eland.py
+++ b/htsworkflow/pipelines/eland.py
@@ -194,6 +194,50 @@ class ElandLane(object):
          return self._match_codes
      match_codes = property(_get_match_codes)
  
+    def _get_no_match(self):
+        if self._mapped_reads is None:
+            self._update()  
+        return self._match_codes['NM']
+    no_match = property(_get_no_match, 
+                        doc="total reads that didn't match the target genome.")
+
+    def _get_no_match_percent(self):
+        return float(self.no_match)/self.reads * 100 
+    no_match_percent = property(_get_no_match_percent,
+                                doc="no match reads as percent of total")
+
+    def _get_qc_failed(self):
+        if self._mapped_reads is None:
+            self._update()  
+        return self._match_codes['QC']
+    qc_failed = property(_get_qc_failed,
+                        doc="total reads that didn't match the target genome.")
+
+    def _get_qc_failed_percent(self):
+        return float(self.qc_failed)/self.reads * 100 
+    qc_failed_percent = property(_get_qc_failed_percent,
+                                 doc="QC failed reads as percent of total")
+
+    def _get_unique_reads(self):
+        if self._mapped_reads is None:
+           self._update()
+        sum = 0
+        for code in ['U0','U1','U2']:
+            sum += self._match_codes[code]
+        return sum
+    unique_reads = property(_get_unique_reads,
+                            doc="total unique reads")
+
+    def _get_repeat_reads(self):
+        if self._mapped_reads is None:
+           self._update()
+        sum = 0
+        for code in ['R0','R1','R2']:
+            sum += self._match_codes[code]
+        return sum
+    repeat_reads = property(_get_repeat_reads,
+                            doc="total repeat reads")
+    
      def get_elements(self):
          lane = ElementTree.Element(ElandLane.LANE,
                                     {'version':
diff --git a/htsworkflow/util/makebed.py b/htsworkflow/util/makebed.py

index eeb4e88ebc685e867153830ec234bb38b55f5def..cb93163661978c7c36e0b1463591238ce79f9488 100755 (executable)
--- a/htsworkflow/util/makebed.py
+++ b/htsworkflow/util/makebed.py
@@ -4,11 +4,13 @@ Utility functions to make bedfiles.
  import os
  import re
  
+__docformat__ = "restructredtext en"
+
  # map eland_result.txt sense 
  sense_map = { 'F': '+', 'R': '-'}
  sense_color = { 'F': '0,0,255', 'R': '255,255,0' }
  
-def write_bed_header(outstream, name, description):
+def create_bed_header(name, description):
    """
    Produce the headerline for a bedfile
    """
@@ -17,11 +19,33 @@ def write_bed_header(outstream, name, description):
    if description is None: description = "eland result file"
    bed_header = 'track name="%s" description="%s" visibility=4 itemRgb="ON"'
    bed_header += os.linesep
-  outstream.write(bed_header % (name, description))
+  return bed_header
  
  def make_bed_from_eland_stream(instream, outstream, name, description, chromosome_prefix='chr'):
    """
    read an eland result file from instream and write a bedfile to outstream
+
+  :Parameters:
+    - `instream`: stream containing the output from eland 
+    - `outstream`: stream to write the bed file too
+    - `name`: name of bed-file (must be unique)
+    - `description`: longer description of the bed file
+    - `chromosome_prefix`: restrict output lines to fasta records that start with this pattern
+  """
+  for line in make_bed_from_eland_generator(instream, name, description, chromosome_prefix):
+      outstream.write(line)
+
+def make_bed_from_eland_generator(instream, name, description, chromosome_prefix='chr'):
+  """
+  read an eland result file from instream and write a bedfile to outstream
+
+  :Parameters:
+    - `instream`: stream containing the output from eland 
+    - `name`: name of bed-file (must be unique)
+    - `description`: longer description of the bed file
+    - `chromosome_prefix`: restrict output lines to fasta records that start with this pattern
+
+  :Return: generator which yields lines of bedfile
    """
    # indexes into fields in eland_result.txt file
    SEQ = 1
@@ -29,7 +53,7 @@ def make_bed_from_eland_stream(instream, outstream, name, description, chromosom
    START = 7
    SENSE = 8
  
-  write_bed_header(outstream, name, description)
+  yield create_bed_header(name, description)
    prefix_len = len(chromosome_prefix)
  
    for line in instream:
@@ -42,15 +66,14 @@ def make_bed_from_eland_stream(instream, outstream, name, description, chromosom
      # strip off filename extension
      chromosome = fields[CHR].split('.')[0]
  
-    outstream.write('%s %s %d read 0 %s - - %s%s' % (
+    yield '%s %s %d read 0 %s - - %s%s' % (
        chromosome,
        start,
        stop,
        sense_map[fields[SENSE]], 
        sense_color[fields[SENSE]],
        os.linesep  
-    ))
-
+    )
  
  def make_bed_from_multi_eland_stream(
    instream, 
@@ -61,17 +84,25 @@ def make_bed_from_multi_eland_stream(
    max_reads=255
    ):
    """
-  read a multi eland stream and write a bedfile
+  read a multi eland result file from instream and write the bedfile to outstream
+
+  :Parameters:
+    - `instream`: stream containing the output from eland 
+    - `outstream`: stream to write the bed file too
+    - `name`: name of bed-file (must be unique)
+    - `description`: longer description of the bed file
+    - `chromosome_prefix`: restrict output lines to fasta records that start with this pattern
+    - `max_reads`: maximum number of reads to write to bed stream
    """
-  write_bed_header(outstream, name, description)
-  parse_multi_eland(instream, outstream, chr_prefix, max_reads)
-
-def parse_multi_eland(instream, outstream, chr_prefix, max_reads=255):
+  for lane in make_bed_from_multi_eland_generator(instream, name, description, chr_prefix, max_reads):
+      oustream.write(lane)
  
+def make_bed_from_multi_eland_generator(instream, name, description, chr_prefix, max_reads=255):
    loc_pattern = '(?P<fullloc>(?P<start>[0-9]+)(?P<dir>[FR])(?P<count>[0-9]+))'
    other_pattern = '(?P<chr>[^:,]+)'
    split_re = re.compile('(%s|%s)' % (loc_pattern, other_pattern))
  
+  yield create_bed_header(name, description)
    for line in instream:
      rec = line.split()
      if len(rec) > 3:
@@ -110,35 +141,30 @@ def parse_multi_eland(instream, outstream, chr_prefix, max_reads=255):
          if reported_reads <= max_reads:
            for cur_chr, start, stop, strand, color in read_list:
              reported_reads += 1
-            outstream.write('%s %d %d read 0 %s - - %s%s' % (
+            yield '%s %d %d read 0 %s - - %s%s' % (
                  cur_chr,
                  start,
                  stop,
                  sense_map[orientation],
                  sense_color[orientation],
                  os.linesep
-            ))
+            )
  
-def make_description(database, flowcell_id, lane):
+def make_description(flowcell_id, lane):
      """
-    compute a bedfile name and description from the fctracker database
+    compute a bedfile name and description from the django database
      """
-    from htsworkflow.util.fctracker import fctracker
+    from htsworkflow.frontend.experiments import models as experiments
  
-    fc = fctracker(database)
-    cells = fc._get_flowcells("where flowcell_id='%s'" % (flowcell_id))
-    if len(cells) != 1:
-      raise RuntimeError("couldn't find flowcell id %s" % (flowcell_id))
      lane = int(lane)
      if lane < 1 or lane > 8:
        raise RuntimeError("flowcells only have lanes 1-8")
  
-    name = "%s-%s" % (flowcell_id, lane)
+    cell = experiments.FlowCell.objects.get(flowcell_id=flowcell_id)
  
-    cell_id, cell = cells.items()[0]
-    assert cell_id == flowcell_id
+    name = "%s-%s" % (flowcell_id, lane)
  
-    cell_library_id = cell['lane_%d_library_id' %(lane,)]
-    cell_library = cell['lane_%d_library' %(lane,)]
-    description = "%s-%s" % (cell_library['library_name'], cell_library_id)
+    cell_library = getattr(cell, 'lane_%d_library' %(lane,))
+    cell_library_id = cell_library.library_id
+    description = "%s-%s" % (cell_library.library_name, cell_library_id)
      return name, description
diff --git a/htsworkflow/util/test/test_makebed.py b/htsworkflow/util/test/test_makebed.py

index 03c79192fd6e27ea79818244cab88253ed68b5ae..b5d3026578fe967cee0fbf0d9fe5dd48042b1a7c 100644 (file)
--- a/htsworkflow/util/test/test_makebed.py
+++ b/htsworkflow/util/test/test_makebed.py
@@ -9,38 +9,42 @@ class testMakeBed(unittest.TestCase):
        instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:112:383    TCAAATCTTATGCTANGAATCNCAAATTTTCT 1:0:0   mm9_chr13_random.fa:1240R0')
        out = StringIO()
  
-      makebed.parse_multi_eland(instream, out, 'mm9_chr', 1)
-      self.failUnlessEqual(out.getvalue(), 'mm9_chr13_random 1240 1272 read 0 - - - 255,255,0\n')
+      out = list(makebed.make_bed_from_multi_eland_generator(instream, 'name', 'description', 'mm9_chr', 1))
+      self.failUnlessEqual(out[1], 'mm9_chr13_random 1240 1272 read 0 - - - 255,255,0\n')
  
      def test_multi_1_0_0_limit_255(self):
        instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:112:383    TCAAATCTTATGCTANGAATCNCAAATTTTCT 1:0:0   mm9_chr13_random.fa:1240R0')
        out = StringIO()
  
-      makebed.parse_multi_eland(instream, out, 'mm9_chr', 255)
-      self.failUnlessEqual(out.getvalue(), 'mm9_chr13_random 1240 1272 read 0 - - - 255,255,0\n')
+      out = list(makebed.make_bed_from_multi_eland_generator(instream, 'name', 'desc', 'mm9_chr', 255))
+      self.failUnlessEqual(out[1], 'mm9_chr13_random 1240 1272 read 0 - - - 255,255,0\n')
  
  
      def test_multi_2_0_0_limit_1(self):
        instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:104:586    GTTCTCGCATAAACTNACTCTNAATAGATTCA 2:0:0   mm9_chr4.fa:42995432F0,mm9_chrX.fa:101541458F0')
        out = StringIO()
  
-      makebed.parse_multi_eland(instream, out, 'mm9_chr', 1)
-      self.failUnlessEqual(out.len, 0)
+      out = list(makebed.make_bed_from_multi_eland_generator(instream, 'name', 'desc', 'mm9_chr', 1))
+      self.failUnlessEqual(len(out), 1)
  
      def test_multi_2_0_0_limit_255(self):
        instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:104:586    GTTCTCGCATAAACTNACTCTNAATAGATTCA 2:0:0   mm9_chr4.fa:42995432F0,mm9_chrX.fa:101541458F0')
        out = StringIO()
  
-      makebed.parse_multi_eland(instream, out, 'mm9_chr', 255)
-      self.failUnlessEqual(out.len, 98)
+      out = list(makebed.make_bed_from_multi_eland_generator(instream, 'name', 'desc', 'mm9_chr', 255))
+      self.failUnlessEqual(len(out), 3)
+      self.failUnlessEqual(out[1], 
+        'mm9_chr4 42995432 42995464 read 0 + - - 0,0,255\n')
+      self.failUnlessEqual(out[2], 
+        'mm9_chrX 101541458 101541490 read 0 + - - 0,0,255\n')
  
      def test_multi_0_2_0_limit_1(self):
        instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:115:495    TCTCCCTGAAAAATANAAGTGNTGTTGGTGAG        0:2:1   mm9_chr14.fa:104434729F2,mm9_chr16.fa:63263818R1,mm9_chr2.fa:52265438R1')
        out = StringIO()
  
-      makebed.parse_multi_eland(instream, out, 'mm9_chr', 1)
-      print out.getvalue()
-      self.failUnlessEqual(out.len, 0)
+      out = list(makebed.make_bed_from_multi_eland_generator(instream, 'name', 'desc', 'mm9_chr', 1))
+      print out
+      self.failUnlessEqual(len(out), 1)
  
  def suite():
      return unittest.makeSuite(testMakeBed, 'test')
author	Diane Trout <diane@caltech.edu>
	Fri, 13 Feb 2009 01:51:58 +0000 (01:51 +0000)
committer	Diane Trout <diane@caltech.edu>
	Fri, 13 Feb 2009 01:51:58 +0000 (01:51 +0000)
htsworkflow/frontend/samples/results.py	[new file with mode: 0644]	patch \| blob
htsworkflow/frontend/samples/views.py		patch \| blob \| history
htsworkflow/frontend/settings.py		patch \| blob \| history
htsworkflow/frontend/templates/samples/library_detail.html	[new file with mode: 0644]	patch \| blob
htsworkflow/frontend/templates/samples/library_index.html	[new file with mode: 0644]	patch \| blob
htsworkflow/frontend/urls.py		patch \| blob \| history
htsworkflow/pipelines/eland.py		patch \| blob \| history
htsworkflow/util/makebed.py		patch \| blob \| history
htsworkflow/util/test/test_makebed.py		patch \| blob \| history