samples/results.py

   1 from __future__ import unicode_literals
   2
   3 from django.conf import settings
   4
   5 import glob
   6 import os
   7 import re
   8
   9 s_paren = re.compile("^\w+")
  10
  11 def get_flowcell_result_dict(flowcell_id):
  12     """
  13     returns a dictionary following the following pattern for
  14     a given flowcell_id:
  15
  16
  17     d['C1-33']['summary']           # Summary.htm file path
  18     d['C1-33']['eland_results'][5]  # C1-33 lane 5 file eland results file path
  19     d['C1-33']['run_xml']           # run_*.xml file path
  20     d['C1-33']['scores']            # scores.tar.gz file path
  21     """
  22     flowcell_id = flowcell_id.strip()
  23
  24     d = {}
  25
  26     ################################
  27     # Flowcell Directory
  28     fc_dir = glob.glob(os.path.join(settings.RESULT_HOME_DIR, flowcell_id))
  29
  30     # Not found
  31     if len(fc_dir) == 0:
  32         return None
  33
  34     # No duplicates!
  35     assert len(fc_dir) <= 1
  36
  37     # Found fc dir
  38     fc_dir = fc_dir[0]
  39
  40     ################################
  41     # C#-## dirs
  42     c_dir_list = glob.glob(os.path.join(fc_dir, 'C*'))
  43
  44     # Not found
  45     if len(c_dir_list) == 0:
  46         return d
  47
  48     for c_dir_path in c_dir_list:
  49         summary_file = glob.glob(os.path.join(c_dir_path, 'Summary.htm'))
  50         pathdir, c_dir = os.path.split(c_dir_path)
  51
  52         # Create sub-dictionary
  53         d[c_dir] = {}
  54
  55
  56         ###############################
  57         # Summary.htm file
  58
  59         # Not found
  60         if len(summary_file) == 0:
  61             d[c_dir]['summary'] = None
  62
  63         # Found
  64         else:
  65             # No duplicates!
  66             assert len(summary_file) == 1
  67
  68             summary_file = summary_file[0]
  69             d[c_dir]['summary'] = summary_file
  70
  71         ###############################
  72         # Result files
  73
  74         d[c_dir]['eland_results'] = {}
  75
  76         result_filepaths = glob.glob(os.path.join(c_dir_path, 's_*_eland_*'))
  77
  78         for filepath in result_filepaths:
  79
  80             junk, result_name = os.path.split(filepath)
  81
  82             #lanes 1-8, single digit, therefore s_#; # == index 2
  83             lane = int(result_name[2])
  84             d[c_dir]['eland_results'][lane] = filepath
  85
  86         ###############################
  87         # run*.xml file
  88         run_xml_filepath = glob.glob(os.path.join(c_dir_path, 'run_*.xml'))
  89
  90         if len(run_xml_filepath) == 0:
  91             d[c_dir]['run_xml'] = None
  92         else:
  93             # No duplicates
  94             assert len(run_xml_filepath) == 1
  95
  96             d[c_dir]['run_xml'] = run_xml_filepath[0]
  97
  98         ###############################
  99         # scores.tar.gz
 100         # restrict to only compressed files, so in case there are *.md5 files
 101         # we don't get confused.
 102         scores_filepath = []
 103         for pattern in ['scores*.tar.bz2', 'scores*.tar.gz', 'scores*.tgz']:
 104             scores_filepath += glob.glob(os.path.join(c_dir_path, pattern))
 105
 106         if len(scores_filepath) == 0:
 107             d[c_dir]['scores'] = None
 108         else:
 109             # No duplicates
 110             assert len(scores_filepath) == 1
 111
 112             d[c_dir]['scores'] = scores_filepath[0]
 113
 114     return d
 115
 116
 117 def cn_mTobp(cn_m):
 118     """
 119     Converts CN-M (i.e. C1-33, C1-26, C4-28) cycle information into
 120     number of base pairs.
 121     """
 122     pass
 123
 124