Attempt to be robust to not having an alignment in our RunXml file
[htsworkflow.git] / htsworkflow / frontend / samples / views.py
index 12bb79288b7b978f3bf65a84bb0fd4b3108ceaee..e50b5b357c2caa1b88d73cb2f2884cf0124f6158 100644 (file)
@@ -9,6 +9,7 @@ try:
 except ImportError, e:
     import simplejson as json
 
+from django.contrib.csrf.middleware import csrf_exempt
 from htsworkflow.frontend.auth import require_api_key
 from htsworkflow.frontend.experiments.models import FlowCell, Lane, LANE_STATUS_MAP
 from htsworkflow.frontend.samples.changelist import ChangeList
@@ -18,6 +19,7 @@ from htsworkflow.frontend.bcmagic.forms import BarcodeMagicForm
 from htsworkflow.pipelines.runfolder import load_pipeline_run_xml
 from htsworkflow.pipelines import runfolder
 from htsworkflow.pipelines.eland import ResultLane
+from htsworkflow.pipelines.samplekey import SampleKey
 from htsworkflow.util.conversion import unicode_or_none, parse_flowcell_id
 from htsworkflow.util import makebed
 from htsworkflow.util import opener
@@ -37,6 +39,8 @@ SAMPLES_CONTEXT_DEFAULTS = {
     'bcmagic': BarcodeMagicForm()
 }
 
+LOGGER = logging.getLogger(__name__)
+
 def count_lanes(lane_set):
     single = 0
     paired = 1
@@ -44,7 +48,7 @@ def count_lanes(lane_set):
     medium_read = 1
     long_read = 2
     counts = [[0,0,0,],[0,0,0]]
-    
+
     for lane in lane_set.all():
         if lane.flowcell.paired_end:
             lane_type = paired
@@ -57,7 +61,7 @@ def count_lanes(lane_set):
         else:
             read_type = long_read
         counts[lane_type][read_type] += 1
-        
+
     return counts
 
 def create_library_context(cl):
@@ -88,32 +92,39 @@ def create_library_context(cl):
     cl.result_count = unicode(cl.paginator._count)
     return {'library_list': records }
 
-def library(request):
+
+def library(request, todo_only=False):
+    queryset = Library.objects.filter(hidden__exact=0)
+    if todo_only:
+        queryset = queryset.filter(lane=None)
     # build changelist
     fcl = ChangeList(request, Library,
         list_filter=['affiliations', 'library_species'],
         search_fields=['id', 'library_name', 'amplified_from_sample__id'],
         list_per_page=200,
-        queryset=Library.objects.filter(hidden__exact=0)
+        queryset=queryset
     )
 
-    context = { 'cl': fcl, 'title': 'Library Index'}
+    context = { 'cl': fcl, 'title': 'Library Index', 'todo_only': todo_only}
     context.update(create_library_context(fcl))
     t = get_template('samples/library_index.html')
     c = RequestContext(request, context)
     return HttpResponse( t.render(c) )
-    
+
+
+def library_not_run(request):
+    return library(request, todo_only=True)
+
 
 def library_to_flowcells(request, lib_id):
     """
     Display information about all the flowcells a library has been run on.
     """
-    
     try:
-      lib = Library.objects.get(id=lib_id)
+        lib = Library.objects.get(id=lib_id)
     except:
-      return HttpResponse("Library %s does not exist" % (lib_id))
-   
+        raise Http404('Library %s does not exist' % (lib_id,))
+
     flowcell_list = []
     flowcell_run_results = {} # aka flowcells we're looking at
     for lane in lib.lane_set.all():
@@ -127,9 +138,9 @@ def library_to_flowcells(request, lib_id):
     lane_summary_list = []
     eland_results = []
     for fc, lane_number in flowcell_list:
-        lane_summary, err_list = _summary_stats(fc, lane_number)
+        lane_summary, err_list = _summary_stats(fc, lane_number, lib_id)
         lane_summary_list.extend(lane_summary)
-        
+
         eland_results.extend(_make_eland_results(fc, lane_number, flowcell_run_results))
 
     context = {
@@ -167,28 +178,28 @@ def lanes_for(request, username=None):
         context,
         context_instance = RequestContext(request)
     )
-          
-    
+
+
 def summaryhtm_fc_cnm(request, flowcell_id, cnm):
     """
     returns a Summary.htm file if it exists.
     """
     fc_id, status = parse_flowcell_id(flowcell_id)
     d = get_flowcell_result_dict(fc_id)
-    
+
     if d is None:
         return HttpResponse('<b>Results for Flowcell %s not found.</b>' % (fc_id))
-    
+
     if cnm not in d:
         return HttpResponse('<b>Results for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
-    
+
     summary_filepath = d[cnm]['summary']
-    
+
     if summary_filepath is None:
         return HttpResponse('<b>Summary.htm for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
-    
+
     f = open(summary_filepath, 'r')
-    
+
     return HttpResponse(f)
 
 
@@ -198,27 +209,27 @@ def result_fc_cnm_eland_lane(request, flowcell_id, cnm, lane):
     """
     fc_id, status = parse_flowcell_id(flowcell_id)
     d = get_flowcell_result_dict(fc_id)
-    
+
     if d is None:
         return HttpResponse('<b>Results for Flowcell %s not found.</b>' % (fc_id))
-    
+
     if cnm not in d:
         return HttpResponse('<b>Results for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
-    
+
     erd = d[cnm]['eland_results']
     lane = int(lane)
-    
+
     if lane not in erd:
         return HttpResponse('<b>Results for Flowcell %s; %s; lane %s not found.</b>' % (fc_id, cnm, lane))
-    
+
     filepath = erd[lane]
-    
+
     #f = opener.autoopen(filepath, 'r')
     # return HttpResponse(f, mimetype="application/x-elandresult")
 
     f = open(filepath, 'r')
     return HttpResponse(f, mimetype='application/x-bzip2')
-    
+
 
 
 def bedfile_fc_cnm_eland_lane_ucsc(request, fc_id, cnm, lane):
@@ -234,36 +245,36 @@ def bedfile_fc_cnm_eland_lane(request, flowcell_id, cnm, lane, ucsc_compatible=F
     """
     fc_id, status = parse_flowcell_id(flowcell_id)
     d = get_flowcell_result_dict(fc_id)
-    
+
     if d is None:
         return HttpResponse('<b>Results for Flowcell %s not found.</b>' % (fc_id))
-    
+
     if cnm not in d:
         return HttpResponse('<b>Results for Flowcell %s; %s not found.</b>' % (fc_id, cnm))
-    
+
     erd = d[cnm]['eland_results']
     lane = int(lane)
-    
+
     if lane not in erd:
         return HttpResponse('<b>Results for Flowcell %s; %s; lane %s not found.</b>' % (fc_id, cnm, lane))
-    
+
     filepath = erd[lane]
-    
+
     # Eland result file
     fi = opener.autoopen(filepath, 'r')
     # output memory file
-    
+
     name, description = makebed.make_description( fc_id, lane )
-    
+
     bedgen = makebed.make_bed_from_eland_generator(fi, name, description)
-    
+
     if ucsc_compatible:
         return HttpResponse(bedgen)
     else:
         return HttpResponse(bedgen, mimetype="application/x-bedfile")
 
 
-def _summary_stats(flowcell_id, lane_id):
+def _summary_stats(flowcell_id, lane_id, library_id):
     """
     Return the summary statistics for a given flowcell, lane, and end.
     """
@@ -272,54 +283,60 @@ def _summary_stats(flowcell_id, lane_id):
 
     summary_list = []
     err_list = []
-    
+
     if fc_result_dict is None:
         err_list.append('Results for Flowcell %s not found.' % (fc_id))
         return (summary_list, err_list)
 
     for cycle_width in fc_result_dict:
         xmlpath = fc_result_dict[cycle_width]['run_xml']
-        
+
         if xmlpath is None:
             err_list.append('Run xml for Flowcell %s(%s) not found.' % (fc_id, cycle_width))
             continue
-        
+
         run = load_pipeline_run_xml(xmlpath)
+        # skip if we don't have available metadata.
+        if run.gerald is None or run.gerald.summary is None:
+            continue
+        
         gerald_summary = run.gerald.summary.lane_results
-        for end in range(len(gerald_summary)):
-            end_summary = run.gerald.eland_results.results[end]
-            if end_summary.has_key(lane_id):
-                eland_summary = run.gerald.eland_results.results[end][lane_id]
-            else:
-                eland_summary = ResultLane(lane_id=lane_id, end=end)
+        key = SampleKey(lane=lane_id, sample='s')
+        eland_results = list(run.gerald.eland_results.find_keys(key))
+        key = SampleKey(lane=lane_id, sample=library_id)
+        eland_results.extend(run.gerald.eland_results.find_keys(key))
+        for key in eland_results:
+            eland_summary = run.gerald.eland_results.results[key]
             # add information to lane_summary
             eland_summary.flowcell_id = flowcell_id
-            if len(gerald_summary) > end and gerald_summary[end].has_key(lane_id):
-                eland_summary.clusters = gerald_summary[end][lane_id].cluster
-            else:
-                eland_summary.clusters = None
+
+            read = key.read-1 if key.read is not None else 0
+            try:
+                eland_summary.clusters = gerald_summary[read][key.lane].cluster
+            except (IndexError, KeyError) as e:
+                eland_summary.clustes = None
             eland_summary.cycle_width = cycle_width
             if hasattr(eland_summary, 'genome_map'):
-                eland_summary.summarized_reads = runfolder.summarize_mapped_reads( 
-                                                   eland_summary.genome_map, 
+                eland_summary.summarized_reads = runfolder.summarize_mapped_reads(
+                                                   eland_summary.genome_map,
                                                    eland_summary.mapped_reads)
 
             # grab some more information out of the flowcell db
             flowcell = FlowCell.objects.get(flowcell_id=flowcell_id)
             #pm_field = 'lane_%d_pM' % (lane_id)
-            lane_obj = flowcell.lane_set.get(lane_number=lane_id)
+            lanes = flowcell.lane_set.filter(lane_number=lane_id)
             eland_summary.flowcell = flowcell
-            eland_summary.lane = lane_obj
+            eland_summary.lanes = lanes
 
             summary_list.append(eland_summary)
 
         #except Exception, e:
         #    summary_list.append("Summary report needs to be updated.")
-        #    logging.error("Exception: " + str(e))
-    
+        #    LOGGER.error("Exception: " + str(e))
+
     return (summary_list, err_list)
 
-    
+
 def get_eland_result_type(pathname):
     """
     Guess the eland result file type from the filename
@@ -341,7 +358,7 @@ def _make_eland_results(flowcell_id, lane_number, interesting_flowcells):
       return []
 
     flowcell = FlowCell.objects.get(flowcell_id=flowcell_id)
-    lane = flowcell.lane_set.get(lane_number=lane_number)
+    lanes = flowcell.lane_set.filter(lane_number=lane_number)
     # Loop throw storage devices if a result has been archived
     storage_id_list = []
     if cur_fc is not None:
@@ -353,7 +370,7 @@ def _make_eland_results(flowcell_id, lane_number, interesting_flowcells):
                 # Otherwise use UUID
                 else:
                     storage_id_list.append(sd.uuid)
-        
+
     # Formatting for template use
     if len(storage_id_list) == 0:
         storage_ids = None
@@ -362,13 +379,13 @@ def _make_eland_results(flowcell_id, lane_number, interesting_flowcells):
 
     results = []
     for cycle in cur_fc.keys():
-        result_path = cur_fc[cycle]['eland_results'].get(lane, None)
-        result_link = make_result_link(fc_id, cycle, lane, result_path)
+        result_path = cur_fc[cycle]['eland_results'].get(lanes[0], None)
+        result_link = make_result_link(fc_id, cycle, lanes[0], result_path)
         results.append({'flowcell_id': fc_id,
                         'flowcell': flowcell,
                         'run_date': flowcell.run_date,
-                        'cycle': cycle, 
-                        'lane': lane
+                        'cycle': cycle,
+                        'lane': lanes[0],
                         'summary_url': make_summary_url(flowcell_id, cycle),
                         'result_url': result_link[0],
                         'result_label': result_link[1],
@@ -392,7 +409,7 @@ def make_result_link(flowcell_id, cycle_name, lane, eland_result_path):
     if result_type == 'result':
        bed_url_pattern = '/results/%s/%s/bedfile/%s'
        bed_url = bed_url_pattern % (flowcell_id, cycle_name, lane)
-    
+
     return (result_url, result_label, bed_url)
 
 def _files(flowcell_id, lane):
@@ -403,19 +420,19 @@ def _files(flowcell_id, lane):
 
     flowcell_id, id = parse_flowcell_id(flowcell_id)
     d = get_flowcell_result_dict(flowcell_id)
-    
+
     if d is None:
         return ''
-    
+
     output = []
-    
+
     # c_name == 'CN-M' (i.e. C1-33)
     for c_name in d:
-        
+
         if d[c_name]['summary'] is not None:
             output.append('<a href="/results/%s/%s/summary/">summary(%s)</a>' \
                           % (flowcell_id, c_name, c_name))
-        
+
         erd = d[c_name]['eland_results']
         if lane in erd:
             result_type = get_eland_result_type(erd[lane])
@@ -424,10 +441,10 @@ def _files(flowcell_id, lane):
             if result_type == 'result':
                 bed_url_pattern = '<a href="/results/%s/%s/bedfile/%s">bedfile(%s)</a>'
                 output.append(bed_url_pattern % (flowcell_id, c_name, lane, c_name))
-    
+
     if len(output) == 0:
         return ''
-    
+
     return '(' + '|'.join(output) + ')'
 
 def library_id_to_admin_url(request, lib_id):
@@ -449,16 +466,17 @@ def library_dict(library_id):
     for lane in lib.lane_set.all():
         lane_info.append( {'flowcell':lane.flowcell.flowcell_id,
                            'lane_number': lane.lane_number,
+                           'lane_id': lane.id,
                            'paired_end': lane.flowcell.paired_end,
                            'read_length': lane.flowcell.read_length,
                            'status_code': lane.status,
                            'status': LANE_STATUS_MAP[lane.status]} )
-        
+
     info = {
         # 'affiliations'?
         # 'aligned_reads': lib.aligned_reads,
         #'amplified_into_sample': lib.amplified_into_sample, # into is a colleciton...
-        #'amplified_from_sample_id': lib.amplified_from_sample, 
+        #'amplified_from_sample_id': lib.amplified_from_sample,
         #'antibody_name': lib.antibody_name(), # we have no antibodies.
         'antibody_id': lib.antibody_id,
         'cell_line_id': lib.cell_line_id,
@@ -490,13 +508,14 @@ def library_dict(library_id):
         info['library_type'] = lib.library_type.name
     return info
 
+@csrf_exempt
 def library_json(request, library_id):
     """
     Return a json formatted library dictionary
     """
     require_api_key(request)
     # what validation should we do on library_id?
-    
+
     lib = library_dict(library_id)
     if lib is None:
         raise Http404
@@ -504,6 +523,7 @@ def library_json(request, library_id):
     lib_json = json.dumps(lib)
     return HttpResponse(lib_json, mimetype='application/json')
 
+@csrf_exempt
 def species_json(request, species_id):
     """
     Return information about a species.
@@ -512,7 +532,7 @@ def species_json(request, species_id):
 
 def species(request, species_id):
     species = get_object_or_404(Species, id=species_id)
-    
+
     context = RequestContext(request,
                              { 'species': species })
 
@@ -522,7 +542,7 @@ def antibodies(request):
     context = RequestContext(request,
                              {'antibodies': Antibody.objects.order_by('antigene')})
     return render_to_response("samples/antibody_index.html", context)
-    
+
 @login_required
 def user_profile(request):
     """