gaworkflow/frontend/fctracker/results.py

   1 from gaworkflow.frontend import settings
   2
   3 import glob
   4 import os
   5 import re
   6
   7 s_paren = re.compile("^\w+")
   8
   9 def get_flowcell_result_dict(flowcell_id):
  10     """
  11     returns a dictionary following the following pattern for
  12     a given flowcell_id:
  13
  14
  15     d['C1-33']['summary'] = #Summary.htm file path
  16     d['C1-33']['eland_results'][5] # C1-33 lane 5 file eland results file path
  17     """
  18
  19     flowcell_id = flowcell_id.strip()
  20
  21     d = {}
  22
  23     ################################
  24     # Flowcell Directory
  25     fc_dir = glob.glob(os.path.join(settings.RESULT_HOME_DIR, flowcell_id))
  26
  27     # Not found
  28     if len(fc_dir) == 0:
  29         return None
  30
  31     # No duplicates!
  32     assert len(fc_dir) <= 1
  33
  34     # Found fc dir
  35     fc_dir = fc_dir[0]
  36
  37     ################################
  38     # C#-## dirs
  39     c_dir_list = glob.glob(os.path.join(fc_dir, 'C*'))
  40
  41     # Not found
  42     if len(c_dir_list) == 0:
  43         return d
  44
  45     for c_dir_path in c_dir_list:
  46         summary_file = glob.glob(os.path.join(c_dir_path, 'Summary.htm'))
  47         pathdir, c_dir = os.path.split(c_dir_path)
  48
  49         # Create sub-dictionary
  50         d[c_dir] = {}
  51
  52
  53         ###############################
  54         # Summary.htm file
  55
  56         # Not found
  57         if len(summary_file) == 0:
  58             d[c_dir]['summary'] = None
  59
  60         # Found
  61         else:
  62             # No duplicates!
  63             assert len(summary_file) == 1
  64
  65             summary_file = summary_file[0]
  66             d[c_dir]['summary'] = summary_file
  67
  68         ###############################
  69         # Result files
  70
  71         d[c_dir]['eland_results'] = {}
  72
  73         result_filepaths = glob.glob(os.path.join(c_dir_path, 's_*_eland_result.txt*'))
  74
  75         for filepath in result_filepaths:
  76
  77             junk, result_name = os.path.split(filepath)
  78
  79             #lanes 1-8, single digit, therefore s_#; # == index 2
  80             lane = int(result_name[2])
  81             d[c_dir]['eland_results'][lane] = filepath
  82
  83         ###############################
  84         # run*.xml file
  85         run_xml_filepath = glob.glob(os.path.join(c_dir_path, 'run_*.xml'))
  86
  87         if len(run_xml_filepath) == 0:
  88             d[c_dir]['run_xml'] = None
  89         else:
  90             # No duplicates
  91             assert len(run_xml_filepath) == 1
  92
  93             d[c_dir]['run_xml'] = run_xml_filepath[0]
  94
  95         ###############################
  96         # scores.tar.gz
  97         scores_filepath = glob.glob(os.path.join(c_dir_path, 'scores*'))
  98
  99         if len(scores_filepath) == 0:
 100             d[c_dir]['scores'] = None
 101         else:
 102             # No duplicates
 103             assert len(scores_filepath) == 1
 104
 105             d[c_dir]['scores'] = scores_filepath[0]
 106
 107     return d
 108
 109
 110 def cn_mTobp(cn_m):
 111     """
 112     Converts CN-M (i.e. C1-33, C1-26, C4-28) cycle information into
 113     number of base pairs.
 114     """
 115     pass
 116
 117
 118 def flowcellIdStrip(flowcell_id):
 119     """
 120     Removes (<words>) from flowcell id
 121     """
 122     mo = s_paren.search(flowcell_id)
 123
 124     if mo:
 125         return flowcell_id[mo.start():mo.end()]
 126     else:
 127         return flowcell_id
 128