gaworkflow/frontend/fctracker/results.py

   1 from gaworkflow.frontend import settings
   2
   3 import glob
   4 import os
   5 import re
   6
   7 s_paren = re.compile("^\w+")
   8
   9 def get_flowcell_result_dict(flowcell_id):
  10     """
  11     returns a dictionary following the following pattern for
  12     a given flowcell_id:
  13
  14
  15     d['C1-33']['summary']           # Summary.htm file path
  16     d['C1-33']['eland_results'][5]  # C1-33 lane 5 file eland results file path
  17     d['C1-33']['run_xml']           # run_*.xml file path
  18     d['C1-33']['scores']            # scores.tar.gz file path
  19     """
  20
  21     flowcell_id = flowcell_id.strip()
  22
  23     d = {}
  24
  25     ################################
  26     # Flowcell Directory
  27     fc_dir = glob.glob(os.path.join(settings.RESULT_HOME_DIR, flowcell_id))
  28
  29     # Not found
  30     if len(fc_dir) == 0:
  31         return None
  32
  33     # No duplicates!
  34     assert len(fc_dir) <= 1
  35
  36     # Found fc dir
  37     fc_dir = fc_dir[0]
  38
  39     ################################
  40     # C#-## dirs
  41     c_dir_list = glob.glob(os.path.join(fc_dir, 'C*'))
  42
  43     # Not found
  44     if len(c_dir_list) == 0:
  45         return d
  46
  47     for c_dir_path in c_dir_list:
  48         summary_file = glob.glob(os.path.join(c_dir_path, 'Summary.htm'))
  49         pathdir, c_dir = os.path.split(c_dir_path)
  50
  51         # Create sub-dictionary
  52         d[c_dir] = {}
  53
  54
  55         ###############################
  56         # Summary.htm file
  57
  58         # Not found
  59         if len(summary_file) == 0:
  60             d[c_dir]['summary'] = None
  61
  62         # Found
  63         else:
  64             # No duplicates!
  65             assert len(summary_file) == 1
  66
  67             summary_file = summary_file[0]
  68             d[c_dir]['summary'] = summary_file
  69
  70         ###############################
  71         # Result files
  72
  73         d[c_dir]['eland_results'] = {}
  74
  75         result_filepaths = glob.glob(os.path.join(c_dir_path, 's_*_eland_result.txt*'))
  76
  77         for filepath in result_filepaths:
  78
  79             junk, result_name = os.path.split(filepath)
  80
  81             #lanes 1-8, single digit, therefore s_#; # == index 2
  82             lane = int(result_name[2])
  83             d[c_dir]['eland_results'][lane] = filepath
  84
  85         ###############################
  86         # run*.xml file
  87         run_xml_filepath = glob.glob(os.path.join(c_dir_path, 'run_*.xml'))
  88
  89         if len(run_xml_filepath) == 0:
  90             d[c_dir]['run_xml'] = None
  91         else:
  92             # No duplicates
  93             assert len(run_xml_filepath) == 1
  94
  95             d[c_dir]['run_xml'] = run_xml_filepath[0]
  96
  97         ###############################
  98         # scores.tar.gz
  99         scores_filepath = glob.glob(os.path.join(c_dir_path, 'scores*'))
 100
 101         if len(scores_filepath) == 0:
 102             d[c_dir]['scores'] = None
 103         else:
 104             # No duplicates
 105             assert len(scores_filepath) == 1
 106
 107             d[c_dir]['scores'] = scores_filepath[0]
 108
 109     return d
 110
 111
 112 def cn_mTobp(cn_m):
 113     """
 114     Converts CN-M (i.e. C1-33, C1-26, C4-28) cycle information into
 115     number of base pairs.
 116     """
 117     pass
 118
 119
 120 def flowcellIdStrip(flowcell_id):
 121     """
 122     Removes (<words>) from flowcell id
 123     """
 124     mo = s_paren.search(flowcell_id)
 125
 126     if mo:
 127         return flowcell_id[mo.start():mo.end()]
 128     else:
 129         return flowcell_id
 130