Attempt to be robust to not having an alignment in our RunXml file

[htsworkflow.git] / htsworkflow / frontend / samples / views.py
diff --git a/htsworkflow/frontend/samples/views.py b/htsworkflow/frontend/samples/views.py

index 12bb79288b7b978f3bf65a84bb0fd4b3108ceaee..e50b5b357c2caa1b88d73cb2f2884cf0124f6158 100644 (file)
--- a/htsworkflow/frontend/samples/views.py
+++ b/htsworkflow/frontend/samples/views.py
@@ -9,6 +9,7 @@ try:
  except ImportError, e:
      import simplejson as json
  
+from django.contrib.csrf.middleware import csrf_exempt
  from htsworkflow.frontend.auth import require_api_key
  from htsworkflow.frontend.experiments.models import FlowCell, Lane, LANE_STATUS_MAP
  from htsworkflow.frontend.samples.changelist import ChangeList
@@ -18,6 +19,7 @@ from htsworkflow.frontend.bcmagic.forms import BarcodeMagicForm
  from htsworkflow.pipelines.runfolder import load_pipeline_run_xml
  from htsworkflow.pipelines import runfolder
  from htsworkflow.pipelines.eland import ResultLane
+from htsworkflow.pipelines.samplekey import SampleKey
  from htsworkflow.util.conversion import unicode_or_none, parse_flowcell_id
  from htsworkflow.util import makebed
  from htsworkflow.util import opener
@@ -37,6 +39,8 @@ SAMPLES_CONTEXT_DEFAULTS = {
      'bcmagic': BarcodeMagicForm()
  }
  
+LOGGER = logging.getLogger(__name__)
+
  def count_lanes(lane_set):
      single = 0
      paired = 1
@@ -44,7 +48,7 @@ def count_lanes(lane_set):
      medium_read = 1
      long_read = 2
      counts = [[0,0,0,],[0,0,0]]
-    
+
      for lane in lane_set.all():
          if lane.flowcell.paired_end:
              lane_type = paired
@@ -57,7 +61,7 @@ def count_lanes(lane_set):
          else:
              read_type = long_read
          counts[lane_type][read_type] += 1
-        
+
      return counts
  
  def create_library_context(cl):
@@ -88,32 +92,39 @@ def create_library_context(cl):
      cl.result_count = unicode(cl.paginator._count)
      return {'library_list': records }
  
-def library(request):
+
+def library(request, todo_only=False):
+    queryset = Library.objects.filter(hidden__exact=0)
+    if todo_only:
+        queryset = queryset.filter(lane=None)
      # build changelist
      fcl = ChangeList(request, Library,
          list_filter=['affiliations', 'library_species'],
          search_fields=['id', 'library_name', 'amplified_from_sample__id'],
          list_per_page=200,
-        queryset=Library.objects.filter(hidden__exact=0)
+        queryset=queryset
      )
  
-    context = { 'cl': fcl, 'title': 'Library Index'}
+    context = { 'cl': fcl, 'title': 'Library Index', 'todo_only': todo_only}
      context.update(create_library_context(fcl))
      t = get_template('samples/library_index.html')
      c = RequestContext(request, context)
      return HttpResponse( t.render(c) )
-    
+
+
+def library_not_run(request):
+    return library(request, todo_only=True)
+
  
  def library_to_flowcells(request, lib_id):
      """
      Display information about all the flowcells a library has been run on.
      """
-    
      try:
-      lib = Library.objects.get(id=lib_id)
+        lib = Library.objects.get(id=lib_id)
      except:
-      return HttpResponse("Library %s does not exist" % (lib_id))
-   
+        raise Http404('Library %s does not exist' % (lib_id,))
+
      flowcell_list = []
      flowcell_run_results = {} # aka flowcells we're looking at
      for lane in lib.lane_set.all():
@@ -127,9 +138,9 @@ def library_to_flowcells(request, lib_id):
      lane_summary_list = []
      eland_results = []
      for fc, lane_number in flowcell_list:
-        lane_summary, err_list = _summary_stats(fc, lane_number)
+        lane_summary, err_list = _summary_stats(fc, lane_number, lib_id)
          lane_summary_list.extend(lane_summary)
-        
+
          eland_results.extend(_make_eland_results(fc, lane_number, flowcell_run_results))
  
      context = {
@@ -167,28 +178,28 @@ def lanes_for(request, username=None):
          context,
          context_instance = RequestContext(request)
      )
-          
-    
+
+
  def summaryhtm_fc_cnm(request, flowcell_id, cnm):
      """
      returns a Summary.htm file if it exists.
      """
      fc_id, status = parse_flowcell_id(flowcell_id)
      d = get_flowcell_result_dict(fc_id)
-    
+
      if d is None:
          return HttpResponse('<b>Results for Flowcell %s not found.</b>' % (fc_id))
-    
+
      if cnm not in d:
          return HttpResponse('<b>Results for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
-    
+
      summary_filepath = d[cnm]['summary']
-    
+
      if summary_filepath is None:
          return HttpResponse('<b>Summary.htm for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
-    
+
      f = open(summary_filepath, 'r')
-    
+
      return HttpResponse(f)
  
  
@@ -198,27 +209,27 @@ def result_fc_cnm_eland_lane(request, flowcell_id, cnm, lane):
      """
      fc_id, status = parse_flowcell_id(flowcell_id)
      d = get_flowcell_result_dict(fc_id)
-    
+
      if d is None:
          return HttpResponse('<b>Results for Flowcell %s not found.</b>' % (fc_id))
-    
+
      if cnm not in d:
          return HttpResponse('<b>Results for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
-    
+
      erd = d[cnm]['eland_results']
      lane = int(lane)
-    
+
      if lane not in erd:
          return HttpResponse('<b>Results for Flowcell %s; %s; lane %s not found.</b>' % (fc_id, cnm, lane))
-    
+
      filepath = erd[lane]
-    
+
      #f = opener.autoopen(filepath, 'r')
      # return HttpResponse(f, mimetype="application/x-elandresult")
  
      f = open(filepath, 'r')
      return HttpResponse(f, mimetype='application/x-bzip2')
-    
+
  
  
  def bedfile_fc_cnm_eland_lane_ucsc(request, fc_id, cnm, lane):
@@ -234,36 +245,36 @@ def bedfile_fc_cnm_eland_lane(request, flowcell_id, cnm, lane, ucsc_compatible=F
      """
      fc_id, status = parse_flowcell_id(flowcell_id)
      d = get_flowcell_result_dict(fc_id)
-    
+
      if d is None:
          return HttpResponse('<b>Results for Flowcell %s not found.</b>' % (fc_id))
-    
+
      if cnm not in d:
          return HttpResponse('<b>Results for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
-    
+
      erd = d[cnm]['eland_results']
      lane = int(lane)
-    
+
      if lane not in erd:
          return HttpResponse('<b>Results for Flowcell %s; %s; lane %s not found.</b>' % (fc_id, cnm, lane))
-    
+
      filepath = erd[lane]
-    
+
      # Eland result file
      fi = opener.autoopen(filepath, 'r')
      # output memory file
-    
+
      name, description = makebed.make_description( fc_id, lane )
-    
+
      bedgen = makebed.make_bed_from_eland_generator(fi, name, description)
-    
+
      if ucsc_compatible:
          return HttpResponse(bedgen)
      else:
          return HttpResponse(bedgen, mimetype="application/x-bedfile")
  
  
-def _summary_stats(flowcell_id, lane_id):
+def _summary_stats(flowcell_id, lane_id, library_id):
      """
      Return the summary statistics for a given flowcell, lane, and end.
      """
@@ -272,54 +283,60 @@ def _summary_stats(flowcell_id, lane_id):
  
      summary_list = []
      err_list = []
-    
+
      if fc_result_dict is None:
          err_list.append('Results for Flowcell %s not found.' % (fc_id))
          return (summary_list, err_list)
  
      for cycle_width in fc_result_dict:
          xmlpath = fc_result_dict[cycle_width]['run_xml']
-        
+
          if xmlpath is None:
              err_list.append('Run xml for Flowcell %s(%s) not found.' % (fc_id, cycle_width))
              continue
-        
+
          run = load_pipeline_run_xml(xmlpath)
+        # skip if we don't have available metadata.
+        if run.gerald is None or run.gerald.summary is None:
+            continue
+        
          gerald_summary = run.gerald.summary.lane_results
-        for end in range(len(gerald_summary)):
-            end_summary = run.gerald.eland_results.results[end]
-            if end_summary.has_key(lane_id):
-                eland_summary = run.gerald.eland_results.results[end][lane_id]
-            else:
-                eland_summary = ResultLane(lane_id=lane_id, end=end)
+        key = SampleKey(lane=lane_id, sample='s')
+        eland_results = list(run.gerald.eland_results.find_keys(key))
+        key = SampleKey(lane=lane_id, sample=library_id)
+        eland_results.extend(run.gerald.eland_results.find_keys(key))
+        for key in eland_results:
+            eland_summary = run.gerald.eland_results.results[key]
              # add information to lane_summary
              eland_summary.flowcell_id = flowcell_id
-            if len(gerald_summary) > end and gerald_summary[end].has_key(lane_id):
-                eland_summary.clusters = gerald_summary[end][lane_id].cluster
-            else:
-                eland_summary.clusters = None
+
+            read = key.read-1 if key.read is not None else 0
+            try:
+                eland_summary.clusters = gerald_summary[read][key.lane].cluster
+            except (IndexError, KeyError) as e:
+                eland_summary.clustes = None
              eland_summary.cycle_width = cycle_width
              if hasattr(eland_summary, 'genome_map'):
-                eland_summary.summarized_reads = runfolder.summarize_mapped_reads( 
-                                                   eland_summary.genome_map, 
+                eland_summary.summarized_reads = runfolder.summarize_mapped_reads(
+                                                   eland_summary.genome_map,
                                                     eland_summary.mapped_reads)
  
              # grab some more information out of the flowcell db
              flowcell = FlowCell.objects.get(flowcell_id=flowcell_id)
              #pm_field = 'lane_%d_pM' % (lane_id)
-            lane_obj = flowcell.lane_set.get(lane_number=lane_id)
+            lanes = flowcell.lane_set.filter(lane_number=lane_id)
              eland_summary.flowcell = flowcell
-            eland_summary.lane = lane_obj
+            eland_summary.lanes = lanes
  
              summary_list.append(eland_summary)
  
          #except Exception, e:
          #    summary_list.append("Summary report needs to be updated.")
-        #    logging.error("Exception: " + str(e))
-    
+        #    LOGGER.error("Exception: " + str(e))
+
      return (summary_list, err_list)
  
-    
+
  def get_eland_result_type(pathname):
      """
      Guess the eland result file type from the filename
@@ -341,7 +358,7 @@ def _make_eland_results(flowcell_id, lane_number, interesting_flowcells):
        return []
  
      flowcell = FlowCell.objects.get(flowcell_id=flowcell_id)
-    lane = flowcell.lane_set.get(lane_number=lane_number)
+    lanes = flowcell.lane_set.filter(lane_number=lane_number)
      # Loop throw storage devices if a result has been archived
      storage_id_list = []
      if cur_fc is not None:
@@ -353,7 +370,7 @@ def _make_eland_results(flowcell_id, lane_number, interesting_flowcells):
                  # Otherwise use UUID
                  else:
                      storage_id_list.append(sd.uuid)
-        
+
      # Formatting for template use
      if len(storage_id_list) == 0:
          storage_ids = None
@@ -362,13 +379,13 @@ def _make_eland_results(flowcell_id, lane_number, interesting_flowcells):
  
      results = []
      for cycle in cur_fc.keys():
-        result_path = cur_fc[cycle]['eland_results'].get(lane, None)
-        result_link = make_result_link(fc_id, cycle, lane, result_path)
+        result_path = cur_fc[cycle]['eland_results'].get(lanes[0], None)
+        result_link = make_result_link(fc_id, cycle, lanes[0], result_path)
          results.append({'flowcell_id': fc_id,
                          'flowcell': flowcell,
                          'run_date': flowcell.run_date,
-                        'cycle': cycle, 
-                        'lane': lane, 
+                        'cycle': cycle,
+                        'lane': lanes[0],
                          'summary_url': make_summary_url(flowcell_id, cycle),
                          'result_url': result_link[0],
                          'result_label': result_link[1],
@@ -392,7 +409,7 @@ def make_result_link(flowcell_id, cycle_name, lane, eland_result_path):
      if result_type == 'result':
         bed_url_pattern = '/results/%s/%s/bedfile/%s'
         bed_url = bed_url_pattern % (flowcell_id, cycle_name, lane)
-    
+
      return (result_url, result_label, bed_url)
  
  def _files(flowcell_id, lane):
@@ -403,19 +420,19 @@ def _files(flowcell_id, lane):
  
      flowcell_id, id = parse_flowcell_id(flowcell_id)
      d = get_flowcell_result_dict(flowcell_id)
-    
+
      if d is None:
          return ''
-    
+
      output = []
-    
+
      # c_name == 'CN-M' (i.e. C1-33)
      for c_name in d:
-        
+
          if d[c_name]['summary'] is not None:
              output.append('<a href="/results/%s/%s/summary/">summary(%s)</a>' \
                            % (flowcell_id, c_name, c_name))
-        
+
          erd = d[c_name]['eland_results']
          if lane in erd:
              result_type = get_eland_result_type(erd[lane])
@@ -424,10 +441,10 @@ def _files(flowcell_id, lane):
              if result_type == 'result':
                  bed_url_pattern = '<a href="/results/%s/%s/bedfile/%s">bedfile(%s)</a>'
                  output.append(bed_url_pattern % (flowcell_id, c_name, lane, c_name))
-    
+
      if len(output) == 0:
          return ''
-    
+
      return '(' + '|'.join(output) + ')'
  
  def library_id_to_admin_url(request, lib_id):
@@ -449,16 +466,17 @@ def library_dict(library_id):
      for lane in lib.lane_set.all():
          lane_info.append( {'flowcell':lane.flowcell.flowcell_id,
                             'lane_number': lane.lane_number,
+                           'lane_id': lane.id,
                             'paired_end': lane.flowcell.paired_end,
                             'read_length': lane.flowcell.read_length,
                             'status_code': lane.status,
                             'status': LANE_STATUS_MAP[lane.status]} )
-        
+
      info = {
          # 'affiliations'?
          # 'aligned_reads': lib.aligned_reads,
          #'amplified_into_sample': lib.amplified_into_sample, # into is a colleciton...
-        #'amplified_from_sample_id': lib.amplified_from_sample, 
+        #'amplified_from_sample_id': lib.amplified_from_sample,
          #'antibody_name': lib.antibody_name(), # we have no antibodies.
          'antibody_id': lib.antibody_id,
          'cell_line_id': lib.cell_line_id,
@@ -490,13 +508,14 @@ def library_dict(library_id):
          info['library_type'] = lib.library_type.name
      return info
  
+@csrf_exempt
  def library_json(request, library_id):
      """
      Return a json formatted library dictionary
      """
      require_api_key(request)
      # what validation should we do on library_id?
-    
+
      lib = library_dict(library_id)
      if lib is None:
          raise Http404
@@ -504,6 +523,7 @@ def library_json(request, library_id):
      lib_json = json.dumps(lib)
      return HttpResponse(lib_json, mimetype='application/json')
  
+@csrf_exempt
  def species_json(request, species_id):
      """
      Return information about a species.
@@ -512,7 +532,7 @@ def species_json(request, species_id):
  
  def species(request, species_id):
      species = get_object_or_404(Species, id=species_id)
-    
+
      context = RequestContext(request,
                               { 'species': species })
  
@@ -522,7 +542,7 @@ def antibodies(request):
      context = RequestContext(request,
                               {'antibodies': Antibody.objects.order_by('antigene')})
      return render_to_response("samples/antibody_index.html", context)
-    
+
  @login_required
  def user_profile(request):
      """