Attempt to be robust to not having an alignment in our RunXml file
[htsworkflow.git] / htsworkflow / frontend / samples / views.py
index 9456712da11f3c1998b0591a0bca87e8bec90bf3..e50b5b357c2caa1b88d73cb2f2884cf0124f6158 100644 (file)
@@ -9,25 +9,29 @@ try:
 except ImportError, e:
     import simplejson as json
 
+from django.contrib.csrf.middleware import csrf_exempt
 from htsworkflow.frontend.auth import require_api_key
-from htsworkflow.frontend.experiments.models import FlowCell
+from htsworkflow.frontend.experiments.models import FlowCell, Lane, LANE_STATUS_MAP
 from htsworkflow.frontend.samples.changelist import ChangeList
-from htsworkflow.frontend.samples.models import Library
-from htsworkflow.frontend.samples.results import get_flowcell_result_dict, parse_flowcell_id
+from htsworkflow.frontend.samples.models import Antibody, Library, Species, HTSUser
+from htsworkflow.frontend.samples.results import get_flowcell_result_dict
 from htsworkflow.frontend.bcmagic.forms import BarcodeMagicForm
 from htsworkflow.pipelines.runfolder import load_pipeline_run_xml
 from htsworkflow.pipelines import runfolder
 from htsworkflow.pipelines.eland import ResultLane
-from htsworkflow.frontend import settings
+from htsworkflow.pipelines.samplekey import SampleKey
+from htsworkflow.util.conversion import unicode_or_none, parse_flowcell_id
 from htsworkflow.util import makebed
 from htsworkflow.util import opener
 
+
 from django.core.exceptions import ObjectDoesNotExist
 from django.http import HttpResponse, HttpResponseRedirect, Http404
-from django.shortcuts import render_to_response
+from django.shortcuts import render_to_response, get_object_or_404
 from django.template import RequestContext
 from django.template.loader import get_template
 from django.contrib.auth.decorators import login_required
+from django.conf import settings
 
 LANE_LIST = [1,2,3,4,5,6,7,8]
 SAMPLES_CONTEXT_DEFAULTS = {
@@ -35,14 +39,40 @@ SAMPLES_CONTEXT_DEFAULTS = {
     'bcmagic': BarcodeMagicForm()
 }
 
+LOGGER = logging.getLogger(__name__)
+
+def count_lanes(lane_set):
+    single = 0
+    paired = 1
+    short_read = 0
+    medium_read = 1
+    long_read = 2
+    counts = [[0,0,0,],[0,0,0]]
+
+    for lane in lane_set.all():
+        if lane.flowcell.paired_end:
+            lane_type = paired
+        else:
+            lane_type = single
+        if lane.flowcell.read_length < 40:
+            read_type = short_read
+        elif lane.flowcell.read_length < 100:
+            read_type = medium_read
+        else:
+            read_type = long_read
+        counts[lane_type][read_type] += 1
+
+    return counts
+
 def create_library_context(cl):
     """
-    Create a list of libraries that includes how many lanes were run
+     Create a list of libraries that includes how many lanes were run
     """
     records = []
     #for lib in library_items.object_list:
     for lib in cl.result_list:
        summary = {}
+       summary['library'] = lib
        summary['library_id'] = lib.id
        summary['library_name'] = lib.library_name
        summary['species_name' ] = lib.library_species.scientific_name
@@ -50,52 +80,51 @@ def create_library_context(cl):
            summary['amplified_from'] = lib.amplified_from_sample.id
        else:
            summary['amplified_from'] = ''
-       lanes_run = 0
-       #for lane_id in LANE_LIST:
-       #    lane = getattr(lib, 'lane_%d_library' % (lane_id,))
-       #    lanes_run += len( lane.all() )
-       lanes_run = lib.lane_set.count()
+       lanes_run = count_lanes(lib.lane_set)
+       # suppress zeros
+       for row in xrange(len(lanes_run)):
+           for col in xrange(len(lanes_run[row])):
+               if lanes_run[row][col] == 0:
+                   lanes_run[row][col] = ''
        summary['lanes_run'] = lanes_run
        summary['is_archived'] = lib.is_archived()
        records.append(summary)
-    cl.result_count = unicode(cl.paginator._count) + u" libraries"
+    cl.result_count = unicode(cl.paginator._count)
     return {'library_list': records }
 
-def library(request):
-   # build changelist
+
+def library(request, todo_only=False):
+    queryset = Library.objects.filter(hidden__exact=0)
+    if todo_only:
+        queryset = queryset.filter(lane=None)
+    # build changelist
     fcl = ChangeList(request, Library,
         list_filter=['affiliations', 'library_species'],
         search_fields=['id', 'library_name', 'amplified_from_sample__id'],
         list_per_page=200,
-        queryset=Library.objects.filter(hidden__exact=0)
+        queryset=queryset
     )
 
-    context = { 'cl': fcl, 'title': 'Library Index'}
+    context = { 'cl': fcl, 'title': 'Library Index', 'todo_only': todo_only}
     context.update(create_library_context(fcl))
     t = get_template('samples/library_index.html')
     c = RequestContext(request, context)
-    
-    app_context = {
-        'page_name': 'Library Index',
-        'east_region_config_div': 'changelist-filter',
-        'body': t.render(c)
-    }
-    app_context.update(SAMPLES_CONTEXT_DEFAULTS)
-    
-    app_t = get_template('flowcell_libraries_app.html')
-    app_c = RequestContext(request, app_context)
-    return HttpResponse( app_t.render(app_c) )
+    return HttpResponse( t.render(c) )
+
+
+def library_not_run(request):
+    return library(request, todo_only=True)
+
 
 def library_to_flowcells(request, lib_id):
     """
     Display information about all the flowcells a library has been run on.
     """
-    
     try:
-      lib = Library.objects.get(id=lib_id)
+        lib = Library.objects.get(id=lib_id)
     except:
-      return HttpResponse("Library %s does not exist" % (lib_id))
-   
+        raise Http404('Library %s does not exist' % (lib_id,))
+
     flowcell_list = []
     flowcell_run_results = {} # aka flowcells we're looking at
     for lane in lib.lane_set.all():
@@ -109,10 +138,10 @@ def library_to_flowcells(request, lib_id):
     lane_summary_list = []
     eland_results = []
     for fc, lane_number in flowcell_list:
-        lane_summary, err_list = _summary_stats(fc, lane_number)
+        lane_summary, err_list = _summary_stats(fc, lane_number, lib_id)
+        lane_summary_list.extend(lane_summary)
 
         eland_results.extend(_make_eland_results(fc, lane_number, flowcell_run_results))
-        lane_summary_list.extend(lane_summary)
 
     context = {
         'page_name': 'Library Details',
@@ -127,26 +156,50 @@ def library_to_flowcells(request, lib_id):
         context,
         context_instance = RequestContext(request))
 
+def lanes_for(request, username=None):
+    """
+    Generate a report of recent activity for a user
+    """
+    query = {}
+    if username is not None:
+        user = HTSUser.objects.get(username=username)
+        query.update({'library__affiliations__users__id':user.id})
+    fcl = ChangeList(request, Lane,
+        list_filter=[],
+        search_fields=['flowcell__flowcell_id', 'library__id', 'library__library_name'],
+        list_per_page=200,
+        queryset=Lane.objects.filter(**query)
+    )
+
+    context = { 'lanes': fcl, 'title': 'Lane Index'}
+
+    return render_to_response(
+        'samples/lanes_for.html',
+        context,
+        context_instance = RequestContext(request)
+    )
+
+
 def summaryhtm_fc_cnm(request, flowcell_id, cnm):
     """
     returns a Summary.htm file if it exists.
     """
     fc_id, status = parse_flowcell_id(flowcell_id)
     d = get_flowcell_result_dict(fc_id)
-    
+
     if d is None:
         return HttpResponse('<b>Results for Flowcell %s not found.</b>' % (fc_id))
-    
+
     if cnm not in d:
         return HttpResponse('<b>Results for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
-    
+
     summary_filepath = d[cnm]['summary']
-    
+
     if summary_filepath is None:
         return HttpResponse('<b>Summary.htm for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
-    
+
     f = open(summary_filepath, 'r')
-    
+
     return HttpResponse(f)
 
 
@@ -156,27 +209,27 @@ def result_fc_cnm_eland_lane(request, flowcell_id, cnm, lane):
     """
     fc_id, status = parse_flowcell_id(flowcell_id)
     d = get_flowcell_result_dict(fc_id)
-    
+
     if d is None:
         return HttpResponse('<b>Results for Flowcell %s not found.</b>' % (fc_id))
-    
+
     if cnm not in d:
         return HttpResponse('<b>Results for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
-    
+
     erd = d[cnm]['eland_results']
     lane = int(lane)
-    
+
     if lane not in erd:
         return HttpResponse('<b>Results for Flowcell %s; %s; lane %s not found.</b>' % (fc_id, cnm, lane))
-    
+
     filepath = erd[lane]
-    
+
     #f = opener.autoopen(filepath, 'r')
     # return HttpResponse(f, mimetype="application/x-elandresult")
 
     f = open(filepath, 'r')
     return HttpResponse(f, mimetype='application/x-bzip2')
-    
+
 
 
 def bedfile_fc_cnm_eland_lane_ucsc(request, fc_id, cnm, lane):
@@ -192,36 +245,36 @@ def bedfile_fc_cnm_eland_lane(request, flowcell_id, cnm, lane, ucsc_compatible=F
     """
     fc_id, status = parse_flowcell_id(flowcell_id)
     d = get_flowcell_result_dict(fc_id)
-    
+
     if d is None:
         return HttpResponse('<b>Results for Flowcell %s not found.</b>' % (fc_id))
-    
+
     if cnm not in d:
         return HttpResponse('<b>Results for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
-    
+
     erd = d[cnm]['eland_results']
     lane = int(lane)
-    
+
     if lane not in erd:
         return HttpResponse('<b>Results for Flowcell %s; %s; lane %s not found.</b>' % (fc_id, cnm, lane))
-    
+
     filepath = erd[lane]
-    
+
     # Eland result file
     fi = opener.autoopen(filepath, 'r')
     # output memory file
-    
+
     name, description = makebed.make_description( fc_id, lane )
-    
+
     bedgen = makebed.make_bed_from_eland_generator(fi, name, description)
-    
+
     if ucsc_compatible:
         return HttpResponse(bedgen)
     else:
         return HttpResponse(bedgen, mimetype="application/x-bedfile")
 
 
-def _summary_stats(flowcell_id, lane_id):
+def _summary_stats(flowcell_id, lane_id, library_id):
     """
     Return the summary statistics for a given flowcell, lane, and end.
     """
@@ -230,112 +283,60 @@ def _summary_stats(flowcell_id, lane_id):
 
     summary_list = []
     err_list = []
-    
+
     if fc_result_dict is None:
         err_list.append('Results for Flowcell %s not found.' % (fc_id))
         return (summary_list, err_list)
 
     for cycle_width in fc_result_dict:
         xmlpath = fc_result_dict[cycle_width]['run_xml']
-        
+
         if xmlpath is None:
             err_list.append('Run xml for Flowcell %s(%s) not found.' % (fc_id, cycle_width))
             continue
-        
+
         run = load_pipeline_run_xml(xmlpath)
+        # skip if we don't have available metadata.
+        if run.gerald is None or run.gerald.summary is None:
+            continue
+        
         gerald_summary = run.gerald.summary.lane_results
-        for end in range(len(gerald_summary)):
-            end_summary = run.gerald.eland_results.results[end]
-            if end_summary.has_key(lane_id):
-                eland_summary = run.gerald.eland_results.results[end][lane_id]
-            else:
-                eland_summary = ResultLane(lane_id=lane_id, end=end)
+        key = SampleKey(lane=lane_id, sample='s')
+        eland_results = list(run.gerald.eland_results.find_keys(key))
+        key = SampleKey(lane=lane_id, sample=library_id)
+        eland_results.extend(run.gerald.eland_results.find_keys(key))
+        for key in eland_results:
+            eland_summary = run.gerald.eland_results.results[key]
             # add information to lane_summary
             eland_summary.flowcell_id = flowcell_id
-            if len(gerald_summary) > end and gerald_summary[end].has_key(lane_id):
-                eland_summary.clusters = gerald_summary[end][lane_id].cluster
-            else:
-                eland_summary.clusters = None
+
+            read = key.read-1 if key.read is not None else 0
+            try:
+                eland_summary.clusters = gerald_summary[read][key.lane].cluster
+            except (IndexError, KeyError) as e:
+                eland_summary.clustes = None
             eland_summary.cycle_width = cycle_width
             if hasattr(eland_summary, 'genome_map'):
-                eland_summary.summarized_reads = runfolder.summarize_mapped_reads( 
-                                                   eland_summary.genome_map, 
+                eland_summary.summarized_reads = runfolder.summarize_mapped_reads(
+                                                   eland_summary.genome_map,
                                                    eland_summary.mapped_reads)
 
             # grab some more information out of the flowcell db
             flowcell = FlowCell.objects.get(flowcell_id=flowcell_id)
             #pm_field = 'lane_%d_pM' % (lane_id)
-            lane_obj = flowcell.lane_set.get(lane_number=lane_id)
-            eland_summary.successful_pm = lane_obj.pM
+            lanes = flowcell.lane_set.filter(lane_number=lane_id)
+            eland_summary.flowcell = flowcell
+            eland_summary.lanes = lanes
 
             summary_list.append(eland_summary)
 
         #except Exception, e:
         #    summary_list.append("Summary report needs to be updated.")
-        #    logging.error("Exception: " + str(e))
-    
+        #    LOGGER.error("Exception: " + str(e))
+
     return (summary_list, err_list)
 
-def _summary_stats_old(flowcell_id, lane):
-    """
-    return a dictionary of summary stats for a given flowcell_id & lane.
-    """
-    fc_id, status = parse_flowcell_id(flowcell_id)
-    fc_result_dict = get_flowcell_result_dict(fc_id)
-    
-    dict_list = []
-    err_list = []
-    summary_list = []
-    
-    if fc_result_dict is None:
-        err_list.append('Results for Flowcell %s not found.' % (fc_id))
-        return (dict_list, err_list, summary_list)
-    
-    for cnm in fc_result_dict:
-    
-        xmlpath = fc_result_dict[cnm]['run_xml']
-        
-        if xmlpath is None:
-            err_list.append('Run xml for Flowcell %s(%s) not found.' % (fc_id, cnm))
-            continue
-        
-        tree = ElementTree.parse(xmlpath).getroot()
-        results = runfolder.PipelineRun(pathname='', xml=tree)
-        try:
-            lane_report = runfolder.summarize_lane(results.gerald, lane)
-            summary_list.append(os.linesep.join(lane_report))
-        except Exception, e:
-            summary_list.append("Summary report needs to be updated.")
-            logging.error("Exception: " + str(e))
-       
-        print >>sys.stderr, "----------------------------------"
-        print >>sys.stderr, "-- DOES NOT SUPPORT PAIRED END ---"
-        print >>sys.stderr, "----------------------------------"
-        lane_results = results.gerald.summary[0][lane]
-        lrs = lane_results
-        
-        d = {}
-        
-        d['average_alignment_score'] = lrs.average_alignment_score
-        d['average_first_cycle_intensity'] = lrs.average_first_cycle_intensity
-        d['cluster'] = lrs.cluster
-        d['lane'] = lrs.lane
-        d['flowcell'] = flowcell_id
-        d['cnm'] = cnm
-        d['percent_error_rate'] = lrs.percent_error_rate
-        d['percent_intensity_after_20_cycles'] = lrs.percent_intensity_after_20_cycles
-        d['percent_pass_filter_align'] = lrs.percent_pass_filter_align
-        d['percent_pass_filter_clusters'] = lrs.percent_pass_filter_clusters
-        
-        #FIXME: function finished, but need to take advantage of
-        #   may need to take in a list of lanes so we only have to
-        #   load the xml file once per flowcell rather than once
-        #   per lane.
-        dict_list.append(d)
-    
-    return (dict_list, err_list, summary_list)
-    
-    
+
 def get_eland_result_type(pathname):
     """
     Guess the eland result file type from the filename
@@ -350,13 +351,14 @@ def get_eland_result_type(pathname):
     else:
         return 'unknown'
 
-def _make_eland_results(flowcell_id, lane, interesting_flowcells):
+def _make_eland_results(flowcell_id, lane_number, interesting_flowcells):
     fc_id, status = parse_flowcell_id(flowcell_id)
     cur_fc = interesting_flowcells.get(fc_id, None)
     if cur_fc is None:
       return []
 
     flowcell = FlowCell.objects.get(flowcell_id=flowcell_id)
+    lanes = flowcell.lane_set.filter(lane_number=lane_number)
     # Loop throw storage devices if a result has been archived
     storage_id_list = []
     if cur_fc is not None:
@@ -368,7 +370,7 @@ def _make_eland_results(flowcell_id, lane, interesting_flowcells):
                 # Otherwise use UUID
                 else:
                     storage_id_list.append(sd.uuid)
-        
+
     # Formatting for template use
     if len(storage_id_list) == 0:
         storage_ids = None
@@ -377,12 +379,13 @@ def _make_eland_results(flowcell_id, lane, interesting_flowcells):
 
     results = []
     for cycle in cur_fc.keys():
-        result_path = cur_fc[cycle]['eland_results'].get(lane, None)
-        result_link = make_result_link(fc_id, cycle, lane, result_path)
+        result_path = cur_fc[cycle]['eland_results'].get(lanes[0], None)
+        result_link = make_result_link(fc_id, cycle, lanes[0], result_path)
         results.append({'flowcell_id': fc_id,
+                        'flowcell': flowcell,
                         'run_date': flowcell.run_date,
-                        'cycle': cycle, 
-                        'lane': lane
+                        'cycle': cycle,
+                        'lane': lanes[0],
                         'summary_url': make_summary_url(flowcell_id, cycle),
                         'result_url': result_link[0],
                         'result_label': result_link[1],
@@ -406,7 +409,7 @@ def make_result_link(flowcell_id, cycle_name, lane, eland_result_path):
     if result_type == 'result':
        bed_url_pattern = '/results/%s/%s/bedfile/%s'
        bed_url = bed_url_pattern % (flowcell_id, cycle_name, lane)
-    
+
     return (result_url, result_label, bed_url)
 
 def _files(flowcell_id, lane):
@@ -417,19 +420,19 @@ def _files(flowcell_id, lane):
 
     flowcell_id, id = parse_flowcell_id(flowcell_id)
     d = get_flowcell_result_dict(flowcell_id)
-    
+
     if d is None:
         return ''
-    
+
     output = []
-    
+
     # c_name == 'CN-M' (i.e. C1-33)
     for c_name in d:
-        
+
         if d[c_name]['summary'] is not None:
             output.append('<a href="/results/%s/%s/summary/">summary(%s)</a>' \
                           % (flowcell_id, c_name, c_name))
-        
+
         erd = d[c_name]['eland_results']
         if lane in erd:
             result_type = get_eland_result_type(erd[lane])
@@ -438,10 +441,10 @@ def _files(flowcell_id, lane):
             if result_type == 'result':
                 bed_url_pattern = '<a href="/results/%s/%s/bedfile/%s">bedfile(%s)</a>'
                 output.append(bed_url_pattern % (flowcell_id, c_name, lane, c_name))
-    
+
     if len(output) == 0:
         return ''
-    
+
     return '(' + '|'.join(output) + ')'
 
 def library_id_to_admin_url(request, lib_id):
@@ -462,21 +465,28 @@ def library_dict(library_id):
     lane_info = []
     for lane in lib.lane_set.all():
         lane_info.append( {'flowcell':lane.flowcell.flowcell_id,
-                           'lane_number': lane.lane_number} )
-        
+                           'lane_number': lane.lane_number,
+                           'lane_id': lane.id,
+                           'paired_end': lane.flowcell.paired_end,
+                           'read_length': lane.flowcell.read_length,
+                           'status_code': lane.status,
+                           'status': LANE_STATUS_MAP[lane.status]} )
+
     info = {
         # 'affiliations'?
         # 'aligned_reads': lib.aligned_reads,
         #'amplified_into_sample': lib.amplified_into_sample, # into is a colleciton...
-        #'amplified_from_sample_id': lib.amplified_from_sample, 
+        #'amplified_from_sample_id': lib.amplified_from_sample,
         #'antibody_name': lib.antibody_name(), # we have no antibodies.
         'antibody_id': lib.antibody_id,
-        'avg_lib_size': lib.avg_lib_size,
-        'cell_line': lib.cell_line.cellline_name,
         'cell_line_id': lib.cell_line_id,
+        'cell_line': unicode_or_none(lib.cell_line),
         'experiment_type': lib.experiment_type.name,
         'experiment_type_id': lib.experiment_type_id,
+        'gel_cut_size': lib.gel_cut_size,
+        'hidden': lib.hidden,
         'id': lib.id,
+        'insert_size': lib.insert_size,
         'lane_set': lane_info,
         'library_id': lib.id,
         'library_name': lib.library_name,
@@ -489,8 +499,8 @@ def library_dict(library_id):
         'notes': lib.notes,
         'replicate': lib.replicate,
         'stopping_point': lib.stopping_point,
-        'successful_pM': unicode(lib.successful_pM),
-        'undiluted_concentration': unicode(lib.undiluted_concentration)
+        'successful_pM': unicode_or_none(lib.successful_pM),
+        'undiluted_concentration': unicode_or_none(lib.undiluted_concentration)
         }
     if lib.library_type_id is None:
         info['library_type'] = None
@@ -498,13 +508,14 @@ def library_dict(library_id):
         info['library_type'] = lib.library_type.name
     return info
 
+@csrf_exempt
 def library_json(request, library_id):
     """
     Return a json formatted library dictionary
     """
     require_api_key(request)
     # what validation should we do on library_id?
-    
+
     lib = library_dict(library_id)
     if lib is None:
         raise Http404
@@ -512,12 +523,26 @@ def library_json(request, library_id):
     lib_json = json.dumps(lib)
     return HttpResponse(lib_json, mimetype='application/json')
 
+@csrf_exempt
 def species_json(request, species_id):
     """
     Return information about a species.
     """
     raise Http404
-    
+
+def species(request, species_id):
+    species = get_object_or_404(Species, id=species_id)
+
+    context = RequestContext(request,
+                             { 'species': species })
+
+    return render_to_response("samples/species_detail.html", context)
+
+def antibodies(request):
+    context = RequestContext(request,
+                             {'antibodies': Antibody.objects.order_by('antigene')})
+    return render_to_response("samples/antibody_index.html", context)
+
 @login_required
 def user_profile(request):
     """
@@ -533,3 +558,4 @@ def user_profile(request):
     return render_to_response('registration/profile.html', context,
                               context_instance=RequestContext(request))
 
+