htsworkflow/frontend/samples/results.py

   1 from django.conf import settings
   2
   3 import glob
   4 import os
   5 import re
   6
   7 s_paren = re.compile("^\w+")
   8
   9 def get_flowcell_result_dict(flowcell_id):
  10     """
  11     returns a dictionary following the following pattern for
  12     a given flowcell_id:
  13
  14
  15     d['C1-33']['summary']           # Summary.htm file path
  16     d['C1-33']['eland_results'][5]  # C1-33 lane 5 file eland results file path
  17     d['C1-33']['run_xml']           # run_*.xml file path
  18     d['C1-33']['scores']            # scores.tar.gz file path
  19     """
  20     flowcell_id = flowcell_id.strip()
  21
  22     d = {}
  23
  24     ################################
  25     # Flowcell Directory
  26     fc_dir = glob.glob(os.path.join(settings.RESULT_HOME_DIR, flowcell_id))
  27
  28     # Not found
  29     if len(fc_dir) == 0:
  30         return None
  31
  32     # No duplicates!
  33     assert len(fc_dir) <= 1
  34
  35     # Found fc dir
  36     fc_dir = fc_dir[0]
  37
  38     ################################
  39     # C#-## dirs
  40     c_dir_list = glob.glob(os.path.join(fc_dir, 'C*'))
  41
  42     # Not found
  43     if len(c_dir_list) == 0:
  44         return d
  45
  46     for c_dir_path in c_dir_list:
  47         summary_file = glob.glob(os.path.join(c_dir_path, 'Summary.htm'))
  48         pathdir, c_dir = os.path.split(c_dir_path)
  49
  50         # Create sub-dictionary
  51         d[c_dir] = {}
  52
  53
  54         ###############################
  55         # Summary.htm file
  56
  57         # Not found
  58         if len(summary_file) == 0:
  59             d[c_dir]['summary'] = None
  60
  61         # Found
  62         else:
  63             # No duplicates!
  64             assert len(summary_file) == 1
  65
  66             summary_file = summary_file[0]
  67             d[c_dir]['summary'] = summary_file
  68
  69         ###############################
  70         # Result files
  71
  72         d[c_dir]['eland_results'] = {}
  73
  74         result_filepaths = glob.glob(os.path.join(c_dir_path, 's_*_eland_*'))
  75
  76         for filepath in result_filepaths:
  77
  78             junk, result_name = os.path.split(filepath)
  79
  80             #lanes 1-8, single digit, therefore s_#; # == index 2
  81             lane = int(result_name[2])
  82             d[c_dir]['eland_results'][lane] = filepath
  83
  84         ###############################
  85         # run*.xml file
  86         run_xml_filepath = glob.glob(os.path.join(c_dir_path, 'run_*.xml'))
  87
  88         if len(run_xml_filepath) == 0:
  89             d[c_dir]['run_xml'] = None
  90         else:
  91             # No duplicates
  92             assert len(run_xml_filepath) == 1
  93
  94             d[c_dir]['run_xml'] = run_xml_filepath[0]
  95
  96         ###############################
  97         # scores.tar.gz
  98         # restrict to only compressed files, so in case there are *.md5 files
  99         # we don't get confused.
 100         scores_filepath = []
 101         for pattern in ['scores*.tar.bz2', 'scores*.tar.gz', 'scores*.tgz']:
 102             scores_filepath += glob.glob(os.path.join(c_dir_path, pattern))
 103
 104         if len(scores_filepath) == 0:
 105             d[c_dir]['scores'] = None
 106         else:
 107             # No duplicates
 108             assert len(scores_filepath) == 1
 109
 110             d[c_dir]['scores'] = scores_filepath[0]
 111
 112     return d
 113
 114
 115 def cn_mTobp(cn_m):
 116     """
 117     Converts CN-M (i.e. C1-33, C1-26, C4-28) cycle information into
 118     number of base pairs.
 119     """
 120     pass
 121
 122