Attempt to be robust to not having an alignment in our RunXml file

[htsworkflow.git] / htsworkflow / frontend / samples / views.py
diff --git a/htsworkflow/frontend/samples/views.py b/htsworkflow/frontend/samples/views.py

index 9456712da11f3c1998b0591a0bca87e8bec90bf3..e50b5b357c2caa1b88d73cb2f2884cf0124f6158 100644 (file)
--- a/htsworkflow/frontend/samples/views.py
+++ b/htsworkflow/frontend/samples/views.py
@@ -9,25 +9,29 @@ try:
  except ImportError, e:
      import simplejson as json
  
+from django.contrib.csrf.middleware import csrf_exempt
  from htsworkflow.frontend.auth import require_api_key
-from htsworkflow.frontend.experiments.models import FlowCell
+from htsworkflow.frontend.experiments.models import FlowCell, Lane, LANE_STATUS_MAP
  from htsworkflow.frontend.samples.changelist import ChangeList
-from htsworkflow.frontend.samples.models import Library
-from htsworkflow.frontend.samples.results import get_flowcell_result_dict, parse_flowcell_id
+from htsworkflow.frontend.samples.models import Antibody, Library, Species, HTSUser
+from htsworkflow.frontend.samples.results import get_flowcell_result_dict
  from htsworkflow.frontend.bcmagic.forms import BarcodeMagicForm
  from htsworkflow.pipelines.runfolder import load_pipeline_run_xml
  from htsworkflow.pipelines import runfolder
  from htsworkflow.pipelines.eland import ResultLane
-from htsworkflow.frontend import settings
+from htsworkflow.pipelines.samplekey import SampleKey
+from htsworkflow.util.conversion import unicode_or_none, parse_flowcell_id
  from htsworkflow.util import makebed
  from htsworkflow.util import opener
  
+
  from django.core.exceptions import ObjectDoesNotExist
  from django.http import HttpResponse, HttpResponseRedirect, Http404
-from django.shortcuts import render_to_response
+from django.shortcuts import render_to_response, get_object_or_404
  from django.template import RequestContext
  from django.template.loader import get_template
  from django.contrib.auth.decorators import login_required
+from django.conf import settings
  
  LANE_LIST = [1,2,3,4,5,6,7,8]
  SAMPLES_CONTEXT_DEFAULTS = {
@@ -35,14 +39,40 @@ SAMPLES_CONTEXT_DEFAULTS = {
      'bcmagic': BarcodeMagicForm()
  }
  
+LOGGER = logging.getLogger(__name__)
+
+def count_lanes(lane_set):
+    single = 0
+    paired = 1
+    short_read = 0
+    medium_read = 1
+    long_read = 2
+    counts = [[0,0,0,],[0,0,0]]
+
+    for lane in lane_set.all():
+        if lane.flowcell.paired_end:
+            lane_type = paired
+        else:
+            lane_type = single
+        if lane.flowcell.read_length < 40:
+            read_type = short_read
+        elif lane.flowcell.read_length < 100:
+            read_type = medium_read
+        else:
+            read_type = long_read
+        counts[lane_type][read_type] += 1
+
+    return counts
+
  def create_library_context(cl):
      """
-    Create a list of libraries that includes how many lanes were run
+     Create a list of libraries that includes how many lanes were run
      """
      records = []
      #for lib in library_items.object_list:
      for lib in cl.result_list:
         summary = {}
+       summary['library'] = lib
         summary['library_id'] = lib.id
         summary['library_name'] = lib.library_name
         summary['species_name' ] = lib.library_species.scientific_name
@@ -50,52 +80,51 @@ def create_library_context(cl):
             summary['amplified_from'] = lib.amplified_from_sample.id
         else:
             summary['amplified_from'] = ''
-       lanes_run = 0
-       #for lane_id in LANE_LIST:
-       #    lane = getattr(lib, 'lane_%d_library' % (lane_id,))
-       #    lanes_run += len( lane.all() )
-       lanes_run = lib.lane_set.count()
+       lanes_run = count_lanes(lib.lane_set)
+       # suppress zeros
+       for row in xrange(len(lanes_run)):
+           for col in xrange(len(lanes_run[row])):
+               if lanes_run[row][col] == 0:
+                   lanes_run[row][col] = ''
         summary['lanes_run'] = lanes_run
         summary['is_archived'] = lib.is_archived()
         records.append(summary)
-    cl.result_count = unicode(cl.paginator._count) + u" libraries"
+    cl.result_count = unicode(cl.paginator._count)
      return {'library_list': records }
  
-def library(request):
-   # build changelist
+
+def library(request, todo_only=False):
+    queryset = Library.objects.filter(hidden__exact=0)
+    if todo_only:
+        queryset = queryset.filter(lane=None)
+    # build changelist
      fcl = ChangeList(request, Library,
          list_filter=['affiliations', 'library_species'],
          search_fields=['id', 'library_name', 'amplified_from_sample__id'],
          list_per_page=200,
-        queryset=Library.objects.filter(hidden__exact=0)
+        queryset=queryset
      )
  
-    context = { 'cl': fcl, 'title': 'Library Index'}
+    context = { 'cl': fcl, 'title': 'Library Index', 'todo_only': todo_only}
      context.update(create_library_context(fcl))
      t = get_template('samples/library_index.html')
      c = RequestContext(request, context)
-    
-    app_context = {
-        'page_name': 'Library Index',
-        'east_region_config_div': 'changelist-filter',
-        'body': t.render(c)
-    }
-    app_context.update(SAMPLES_CONTEXT_DEFAULTS)
-    
-    app_t = get_template('flowcell_libraries_app.html')
-    app_c = RequestContext(request, app_context)
-    return HttpResponse( app_t.render(app_c) )
+    return HttpResponse( t.render(c) )
+
+
+def library_not_run(request):
+    return library(request, todo_only=True)
+
  
  def library_to_flowcells(request, lib_id):
      """
      Display information about all the flowcells a library has been run on.
      """
-    
      try:
-      lib = Library.objects.get(id=lib_id)
+        lib = Library.objects.get(id=lib_id)
      except:
-      return HttpResponse("Library %s does not exist" % (lib_id))
-   
+        raise Http404('Library %s does not exist' % (lib_id,))
+
      flowcell_list = []
      flowcell_run_results = {} # aka flowcells we're looking at
      for lane in lib.lane_set.all():
@@ -109,10 +138,10 @@ def library_to_flowcells(request, lib_id):
      lane_summary_list = []
      eland_results = []
      for fc, lane_number in flowcell_list:
-        lane_summary, err_list = _summary_stats(fc, lane_number)
+        lane_summary, err_list = _summary_stats(fc, lane_number, lib_id)
+        lane_summary_list.extend(lane_summary)
  
          eland_results.extend(_make_eland_results(fc, lane_number, flowcell_run_results))
-        lane_summary_list.extend(lane_summary)
  
      context = {
          'page_name': 'Library Details',
@@ -127,26 +156,50 @@ def library_to_flowcells(request, lib_id):
          context,
          context_instance = RequestContext(request))
  
+def lanes_for(request, username=None):
+    """
+    Generate a report of recent activity for a user
+    """
+    query = {}
+    if username is not None:
+        user = HTSUser.objects.get(username=username)
+        query.update({'library__affiliations__users__id':user.id})
+    fcl = ChangeList(request, Lane,
+        list_filter=[],
+        search_fields=['flowcell__flowcell_id', 'library__id', 'library__library_name'],
+        list_per_page=200,
+        queryset=Lane.objects.filter(**query)
+    )
+
+    context = { 'lanes': fcl, 'title': 'Lane Index'}
+
+    return render_to_response(
+        'samples/lanes_for.html',
+        context,
+        context_instance = RequestContext(request)
+    )
+
+
  def summaryhtm_fc_cnm(request, flowcell_id, cnm):
      """
      returns a Summary.htm file if it exists.
      """
      fc_id, status = parse_flowcell_id(flowcell_id)
      d = get_flowcell_result_dict(fc_id)
-    
+
      if d is None:
          return HttpResponse('<b>Results for Flowcell %s not found.</b>' % (fc_id))
-    
+
      if cnm not in d:
          return HttpResponse('<b>Results for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
-    
+
      summary_filepath = d[cnm]['summary']
-    
+
      if summary_filepath is None:
          return HttpResponse('<b>Summary.htm for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
-    
+
      f = open(summary_filepath, 'r')
-    
+
      return HttpResponse(f)
  
  
@@ -156,27 +209,27 @@ def result_fc_cnm_eland_lane(request, flowcell_id, cnm, lane):
      """
      fc_id, status = parse_flowcell_id(flowcell_id)
      d = get_flowcell_result_dict(fc_id)
-    
+
      if d is None:
          return HttpResponse('<b>Results for Flowcell %s not found.</b>' % (fc_id))
-    
+
      if cnm not in d:
          return HttpResponse('<b>Results for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
-    
+
      erd = d[cnm]['eland_results']
      lane = int(lane)
-    
+
      if lane not in erd:
          return HttpResponse('<b>Results for Flowcell %s; %s; lane %s not found.</b>' % (fc_id, cnm, lane))
-    
+
      filepath = erd[lane]
-    
+
      #f = opener.autoopen(filepath, 'r')
      # return HttpResponse(f, mimetype="application/x-elandresult")
  
      f = open(filepath, 'r')
      return HttpResponse(f, mimetype='application/x-bzip2')
-    
+
  
  
  def bedfile_fc_cnm_eland_lane_ucsc(request, fc_id, cnm, lane):
@@ -192,36 +245,36 @@ def bedfile_fc_cnm_eland_lane(request, flowcell_id, cnm, lane, ucsc_compatible=F
      """
      fc_id, status = parse_flowcell_id(flowcell_id)
      d = get_flowcell_result_dict(fc_id)
-    
+
      if d is None:
          return HttpResponse('<b>Results for Flowcell %s not found.</b>' % (fc_id))
-    
+
      if cnm not in d:
          return HttpResponse('<b>Results for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
-    
+
      erd = d[cnm]['eland_results']
      lane = int(lane)
-    
+
      if lane not in erd:
          return HttpResponse('<b>Results for Flowcell %s; %s; lane %s not found.</b>' % (fc_id, cnm, lane))
-    
+
      filepath = erd[lane]
-    
+
      # Eland result file
      fi = opener.autoopen(filepath, 'r')
      # output memory file
-    
+
      name, description = makebed.make_description( fc_id, lane )
-    
+
      bedgen = makebed.make_bed_from_eland_generator(fi, name, description)
-    
+
      if ucsc_compatible:
          return HttpResponse(bedgen)
      else:
          return HttpResponse(bedgen, mimetype="application/x-bedfile")
  
  
-def _summary_stats(flowcell_id, lane_id):
+def _summary_stats(flowcell_id, lane_id, library_id):
      """
      Return the summary statistics for a given flowcell, lane, and end.
      """
@@ -230,112 +283,60 @@ def _summary_stats(flowcell_id, lane_id):
  
      summary_list = []
      err_list = []
-    
+
      if fc_result_dict is None:
          err_list.append('Results for Flowcell %s not found.' % (fc_id))
          return (summary_list, err_list)
  
      for cycle_width in fc_result_dict:
          xmlpath = fc_result_dict[cycle_width]['run_xml']
-        
+
          if xmlpath is None:
              err_list.append('Run xml for Flowcell %s(%s) not found.' % (fc_id, cycle_width))
              continue
-        
+
          run = load_pipeline_run_xml(xmlpath)
+        # skip if we don't have available metadata.
+        if run.gerald is None or run.gerald.summary is None:
+            continue
+        
          gerald_summary = run.gerald.summary.lane_results
-        for end in range(len(gerald_summary)):
-            end_summary = run.gerald.eland_results.results[end]
-            if end_summary.has_key(lane_id):
-                eland_summary = run.gerald.eland_results.results[end][lane_id]
-            else:
-                eland_summary = ResultLane(lane_id=lane_id, end=end)
+        key = SampleKey(lane=lane_id, sample='s')
+        eland_results = list(run.gerald.eland_results.find_keys(key))
+        key = SampleKey(lane=lane_id, sample=library_id)
+        eland_results.extend(run.gerald.eland_results.find_keys(key))
+        for key in eland_results:
+            eland_summary = run.gerald.eland_results.results[key]
              # add information to lane_summary
              eland_summary.flowcell_id = flowcell_id
-            if len(gerald_summary) > end and gerald_summary[end].has_key(lane_id):
-                eland_summary.clusters = gerald_summary[end][lane_id].cluster
-            else:
-                eland_summary.clusters = None
+
+            read = key.read-1 if key.read is not None else 0
+            try:
+                eland_summary.clusters = gerald_summary[read][key.lane].cluster
+            except (IndexError, KeyError) as e:
+                eland_summary.clustes = None
              eland_summary.cycle_width = cycle_width
              if hasattr(eland_summary, 'genome_map'):
-                eland_summary.summarized_reads = runfolder.summarize_mapped_reads( 
-                                                   eland_summary.genome_map, 
+                eland_summary.summarized_reads = runfolder.summarize_mapped_reads(
+                                                   eland_summary.genome_map,
                                                     eland_summary.mapped_reads)
  
              # grab some more information out of the flowcell db
              flowcell = FlowCell.objects.get(flowcell_id=flowcell_id)
              #pm_field = 'lane_%d_pM' % (lane_id)
-            lane_obj = flowcell.lane_set.get(lane_number=lane_id)
-            eland_summary.successful_pm = lane_obj.pM
+            lanes = flowcell.lane_set.filter(lane_number=lane_id)
+            eland_summary.flowcell = flowcell
+            eland_summary.lanes = lanes
  
              summary_list.append(eland_summary)
  
          #except Exception, e:
          #    summary_list.append("Summary report needs to be updated.")
-        #    logging.error("Exception: " + str(e))
-    
+        #    LOGGER.error("Exception: " + str(e))
+
      return (summary_list, err_list)
  
-def _summary_stats_old(flowcell_id, lane):
-    """
-    return a dictionary of summary stats for a given flowcell_id & lane.
-    """
-    fc_id, status = parse_flowcell_id(flowcell_id)
-    fc_result_dict = get_flowcell_result_dict(fc_id)
-    
-    dict_list = []
-    err_list = []
-    summary_list = []
-    
-    if fc_result_dict is None:
-        err_list.append('Results for Flowcell %s not found.' % (fc_id))
-        return (dict_list, err_list, summary_list)
-    
-    for cnm in fc_result_dict:
-    
-        xmlpath = fc_result_dict[cnm]['run_xml']
-        
-        if xmlpath is None:
-            err_list.append('Run xml for Flowcell %s(%s) not found.' % (fc_id, cnm))
-            continue
-        
-        tree = ElementTree.parse(xmlpath).getroot()
-        results = runfolder.PipelineRun(pathname='', xml=tree)
-        try:
-            lane_report = runfolder.summarize_lane(results.gerald, lane)
-            summary_list.append(os.linesep.join(lane_report))
-        except Exception, e:
-            summary_list.append("Summary report needs to be updated.")
-            logging.error("Exception: " + str(e))
-       
-        print >>sys.stderr, "----------------------------------"
-        print >>sys.stderr, "-- DOES NOT SUPPORT PAIRED END ---"
-        print >>sys.stderr, "----------------------------------"
-        lane_results = results.gerald.summary[0][lane]
-        lrs = lane_results
-        
-        d = {}
-        
-        d['average_alignment_score'] = lrs.average_alignment_score
-        d['average_first_cycle_intensity'] = lrs.average_first_cycle_intensity
-        d['cluster'] = lrs.cluster
-        d['lane'] = lrs.lane
-        d['flowcell'] = flowcell_id
-        d['cnm'] = cnm
-        d['percent_error_rate'] = lrs.percent_error_rate
-        d['percent_intensity_after_20_cycles'] = lrs.percent_intensity_after_20_cycles
-        d['percent_pass_filter_align'] = lrs.percent_pass_filter_align
-        d['percent_pass_filter_clusters'] = lrs.percent_pass_filter_clusters
-        
-        #FIXME: function finished, but need to take advantage of
-        #   may need to take in a list of lanes so we only have to
-        #   load the xml file once per flowcell rather than once
-        #   per lane.
-        dict_list.append(d)
-    
-    return (dict_list, err_list, summary_list)
-    
-    
+
  def get_eland_result_type(pathname):
      """
      Guess the eland result file type from the filename
@@ -350,13 +351,14 @@ def get_eland_result_type(pathname):
      else:
          return 'unknown'
  
-def _make_eland_results(flowcell_id, lane, interesting_flowcells):
+def _make_eland_results(flowcell_id, lane_number, interesting_flowcells):
      fc_id, status = parse_flowcell_id(flowcell_id)
      cur_fc = interesting_flowcells.get(fc_id, None)
      if cur_fc is None:
        return []
  
      flowcell = FlowCell.objects.get(flowcell_id=flowcell_id)
+    lanes = flowcell.lane_set.filter(lane_number=lane_number)
      # Loop throw storage devices if a result has been archived
      storage_id_list = []
      if cur_fc is not None:
@@ -368,7 +370,7 @@ def _make_eland_results(flowcell_id, lane, interesting_flowcells):
                  # Otherwise use UUID
                  else:
                      storage_id_list.append(sd.uuid)
-        
+
      # Formatting for template use
      if len(storage_id_list) == 0:
          storage_ids = None
@@ -377,12 +379,13 @@ def _make_eland_results(flowcell_id, lane, interesting_flowcells):
  
      results = []
      for cycle in cur_fc.keys():
-        result_path = cur_fc[cycle]['eland_results'].get(lane, None)
-        result_link = make_result_link(fc_id, cycle, lane, result_path)
+        result_path = cur_fc[cycle]['eland_results'].get(lanes[0], None)
+        result_link = make_result_link(fc_id, cycle, lanes[0], result_path)
          results.append({'flowcell_id': fc_id,
+                        'flowcell': flowcell,
                          'run_date': flowcell.run_date,
-                        'cycle': cycle, 
-                        'lane': lane, 
+                        'cycle': cycle,
+                        'lane': lanes[0],
                          'summary_url': make_summary_url(flowcell_id, cycle),
                          'result_url': result_link[0],
                          'result_label': result_link[1],
@@ -406,7 +409,7 @@ def make_result_link(flowcell_id, cycle_name, lane, eland_result_path):
      if result_type == 'result':
         bed_url_pattern = '/results/%s/%s/bedfile/%s'
         bed_url = bed_url_pattern % (flowcell_id, cycle_name, lane)
-    
+
      return (result_url, result_label, bed_url)
  
  def _files(flowcell_id, lane):
@@ -417,19 +420,19 @@ def _files(flowcell_id, lane):
  
      flowcell_id, id = parse_flowcell_id(flowcell_id)
      d = get_flowcell_result_dict(flowcell_id)
-    
+
      if d is None:
          return ''
-    
+
      output = []
-    
+
      # c_name == 'CN-M' (i.e. C1-33)
      for c_name in d:
-        
+
          if d[c_name]['summary'] is not None:
              output.append('<a href="/results/%s/%s/summary/">summary(%s)</a>' \
                            % (flowcell_id, c_name, c_name))
-        
+
          erd = d[c_name]['eland_results']
          if lane in erd:
              result_type = get_eland_result_type(erd[lane])
@@ -438,10 +441,10 @@ def _files(flowcell_id, lane):
              if result_type == 'result':
                  bed_url_pattern = '<a href="/results/%s/%s/bedfile/%s">bedfile(%s)</a>'
                  output.append(bed_url_pattern % (flowcell_id, c_name, lane, c_name))
-    
+
      if len(output) == 0:
          return ''
-    
+
      return '(' + '|'.join(output) + ')'
  
  def library_id_to_admin_url(request, lib_id):
@@ -462,21 +465,28 @@ def library_dict(library_id):
      lane_info = []
      for lane in lib.lane_set.all():
          lane_info.append( {'flowcell':lane.flowcell.flowcell_id,
-                           'lane_number': lane.lane_number} )
-        
+                           'lane_number': lane.lane_number,
+                           'lane_id': lane.id,
+                           'paired_end': lane.flowcell.paired_end,
+                           'read_length': lane.flowcell.read_length,
+                           'status_code': lane.status,
+                           'status': LANE_STATUS_MAP[lane.status]} )
+
      info = {
          # 'affiliations'?
          # 'aligned_reads': lib.aligned_reads,
          #'amplified_into_sample': lib.amplified_into_sample, # into is a colleciton...
-        #'amplified_from_sample_id': lib.amplified_from_sample, 
+        #'amplified_from_sample_id': lib.amplified_from_sample,
          #'antibody_name': lib.antibody_name(), # we have no antibodies.
          'antibody_id': lib.antibody_id,
-        'avg_lib_size': lib.avg_lib_size,
-        'cell_line': lib.cell_line.cellline_name,
          'cell_line_id': lib.cell_line_id,
+        'cell_line': unicode_or_none(lib.cell_line),
          'experiment_type': lib.experiment_type.name,
          'experiment_type_id': lib.experiment_type_id,
+        'gel_cut_size': lib.gel_cut_size,
+        'hidden': lib.hidden,
          'id': lib.id,
+        'insert_size': lib.insert_size,
          'lane_set': lane_info,
          'library_id': lib.id,
          'library_name': lib.library_name,
@@ -489,8 +499,8 @@ def library_dict(library_id):
          'notes': lib.notes,
          'replicate': lib.replicate,
          'stopping_point': lib.stopping_point,
-        'successful_pM': unicode(lib.successful_pM),
-        'undiluted_concentration': unicode(lib.undiluted_concentration)
+        'successful_pM': unicode_or_none(lib.successful_pM),
+        'undiluted_concentration': unicode_or_none(lib.undiluted_concentration)
          }
      if lib.library_type_id is None:
          info['library_type'] = None
@@ -498,13 +508,14 @@ def library_dict(library_id):
          info['library_type'] = lib.library_type.name
      return info
  
+@csrf_exempt
  def library_json(request, library_id):
      """
      Return a json formatted library dictionary
      """
      require_api_key(request)
      # what validation should we do on library_id?
-    
+
      lib = library_dict(library_id)
      if lib is None:
          raise Http404
@@ -512,12 +523,26 @@ def library_json(request, library_id):
      lib_json = json.dumps(lib)
      return HttpResponse(lib_json, mimetype='application/json')
  
+@csrf_exempt
  def species_json(request, species_id):
      """
      Return information about a species.
      """
      raise Http404
-    
+
+def species(request, species_id):
+    species = get_object_or_404(Species, id=species_id)
+
+    context = RequestContext(request,
+                             { 'species': species })
+
+    return render_to_response("samples/species_detail.html", context)
+
+def antibodies(request):
+    context = RequestContext(request,
+                             {'antibodies': Antibody.objects.order_by('antigene')})
+    return render_to_response("samples/antibody_index.html", context)
+
  @login_required
  def user_profile(request):
      """
@@ -533,3 +558,4 @@ def user_profile(request):
      return render_to_response('registration/profile.html', context,
                                context_instance=RequestContext(request))
  
+