"""
Analyze the Summary.htm file produced by GERALD
"""
+import logging
import types
+from pprint import pprint
from htsworkflow.pipelines.runfolder import ElementTree
from htsworkflow.util.ethelp import indent, flatten
+nan = float('nan')
+
class Summary(object):
"""
Extract some useful information from the Summary.htm file
def set_elements_from_html(self, data):
if not len(data) in (8,10):
- raise RuntimeError("Summary.htm file format changed")
+ raise RuntimeError("Summary.htm file format changed, len(data)=%d" % (len(data),))
# same in pre-0.3.0 Summary file and 0.3 summary file
self.lane = int(data[0])
for name, end in table_names:
if tables.has_key(name):
self._extract_lane_results_for_end(tables, name, end)
+ else:
+ logging.warning("No Lane Results Summary Found in %s" % (pathname,))
def _extract_lane_results_for_end(self, tables, table_name, end):
"""
# grab the lane by lane data
lane_summary = lane_summary[1:]
- # this is version 2 of the summary file
- if len(lane_summary[-1]) == 10:
+ # len(lane_summary[-1] = 10 is version 2 of the summary file
+ # = 9 is version 3 of the Summary.htm file
+ elif len(lane_summary[-1]) in (9, 10):
# lane_summary[0] is a different less specific header row
headers = lane_summary[1]
lane_summary = lane_summary[2:10]
for element in list(tree):
lrs = Summary.LaneResultSummary()
lrs.set_elements(element)
- print lrs.end, lrs.lane
if len(self.lane_results) < (lrs.end + 1):
self.lane_results.append({})
self.lane_results[lrs.end][lrs.lane] = lrs
Parse values like 123 +/- 4.5
"""
if value.strip() == 'unknown':
- return 0, 0
+ return nan, nan
+
+ values = value.split()
+ if len(values) == 1:
+ if values[0] == '+/-':
+ return nan,nan
+ else:
+ return tonumber(values[0])
- average, pm, deviation = value.split()
+ average, pm, deviation = values
if pm != '+/-':
raise RuntimeError("Summary.htm file format changed")
return tonumber(average), tonumber(deviation)