htsworkflow/frontend/samples/results.py

   1 from htsworkflow.frontend import settings
   2
   3 import glob
   4 import os
   5 import re
   6
   7 s_paren = re.compile("^\w+")
   8
   9 def get_flowcell_result_dict(flowcell_id):
  10     """
  11     returns a dictionary following the following pattern for
  12     a given flowcell_id:
  13
  14
  15     d['C1-33']['summary']           # Summary.htm file path
  16     d['C1-33']['eland_results'][5]  # C1-33 lane 5 file eland results file path
  17     d['C1-33']['run_xml']           # run_*.xml file path
  18     d['C1-33']['scores']            # scores.tar.gz file path
  19     """
  20     flowcell_id = flowcell_id.strip()
  21
  22     d = {}
  23
  24     ################################
  25     # Flowcell Directory
  26     fc_dir = glob.glob(os.path.join(settings.RESULT_HOME_DIR, flowcell_id))
  27
  28     # Not found
  29     if len(fc_dir) == 0:
  30         return None
  31
  32     # No duplicates!
  33     assert len(fc_dir) <= 1
  34
  35     # Found fc dir
  36     fc_dir = fc_dir[0]
  37
  38     ################################
  39     # C#-## dirs
  40     c_dir_list = glob.glob(os.path.join(fc_dir, 'C*'))
  41
  42     # Not found
  43     if len(c_dir_list) == 0:
  44         return d
  45
  46     for c_dir_path in c_dir_list:
  47         summary_file = glob.glob(os.path.join(c_dir_path, 'Summary.htm'))
  48         pathdir, c_dir = os.path.split(c_dir_path)
  49
  50         # Create sub-dictionary
  51         d[c_dir] = {}
  52
  53
  54         ###############################
  55         # Summary.htm file
  56
  57         # Not found
  58         if len(summary_file) == 0:
  59             d[c_dir]['summary'] = None
  60
  61         # Found
  62         else:
  63             # No duplicates!
  64             assert len(summary_file) == 1
  65
  66             summary_file = summary_file[0]
  67             d[c_dir]['summary'] = summary_file
  68
  69         ###############################
  70         # Result files
  71
  72         d[c_dir]['eland_results'] = {}
  73
  74         result_filepaths = glob.glob(os.path.join(c_dir_path, 's_*_eland_*'))
  75
  76         for filepath in result_filepaths:
  77
  78             junk, result_name = os.path.split(filepath)
  79
  80             #lanes 1-8, single digit, therefore s_#; # == index 2
  81             lane = int(result_name[2])
  82             d[c_dir]['eland_results'][lane] = filepath
  83
  84         ###############################
  85         # run*.xml file
  86         run_xml_filepath = glob.glob(os.path.join(c_dir_path, 'run_*.xml'))
  87
  88         if len(run_xml_filepath) == 0:
  89             d[c_dir]['run_xml'] = None
  90         else:
  91             # No duplicates
  92             assert len(run_xml_filepath) == 1
  93
  94             d[c_dir]['run_xml'] = run_xml_filepath[0]
  95
  96         ###############################
  97         # scores.tar.gz
  98         # restrict to only compressed files, so in case there are *.md5 files
  99         # we don't get confused.
 100         scores_filepath = []
 101         for pattern in ['scores*.tar.bz2', 'scores*.tar.gz', 'scores*.tgz']:
 102             scores_filepath += glob.glob(os.path.join(c_dir_path, pattern))
 103
 104         if len(scores_filepath) == 0:
 105             d[c_dir]['scores'] = None
 106         else:
 107             # No duplicates
 108             assert len(scores_filepath) == 1
 109
 110             d[c_dir]['scores'] = scores_filepath[0]
 111
 112     return d
 113
 114
 115 def cn_mTobp(cn_m):
 116     """
 117     Converts CN-M (i.e. C1-33, C1-26, C4-28) cycle information into
 118     number of base pairs.
 119     """
 120     pass
 121
 122
 123 def parse_flowcell_id(flowcell_id):
 124     """
 125     Return flowcell id and any status encoded in the id
 126
 127     We stored the status information in the flowcell id name.
 128     this was dumb, but database schemas are hard to update.
 129     """
 130     fields = flowcell_id.split()
 131     fcid = None
 132     status = None
 133     if len(fields) > 0:
 134         fcid = fields[0]
 135     if len(fields) > 1:
 136         status = fields[1]
 137     return fcid, status
 138