2 Provide access to information stored in the GERALD directory.
4 from datetime import datetime, date
10 from htsworkflow.pipelines.summary import Summary, SummaryGA, SummaryHiSeq
11 from htsworkflow.pipelines.eland import eland, ELAND
13 from htsworkflow.pipelines.runfolder import \
18 from htsworkflow.util.ethelp import indent, flatten
20 LOGGER = logging.getLogger(__name__)
24 Capture meaning out of the GERALD directory
28 RUN_PARAMETERS='RunParameters'
31 def __init__(self, xml=None):
35 # parse lane parameters out of the config.xml file
36 self.lanes = LaneSpecificRunParameters(self)
39 self.eland_results = None
42 self.set_elements(xml)
46 return datetime.today()
47 timestamp = self.tree.findtext('ChipWideRunParameters/TIME_STAMP')
48 if timestamp is not None:
49 epochstamp = time.mktime(time.strptime(timestamp, '%c'))
50 return datetime.fromtimestamp(epochstamp)
51 if self.pathname is not None:
52 epochstamp = os.stat(self.pathname)[stat.ST_MTIME]
53 return datetime.fromtimestamp(epochstamp)
54 return datetime.today()
55 date = property(_get_date)
58 return time.mktime(self.date.timetuple())
59 time = property(_get_time, doc='return run time as seconds since epoch')
61 def _get_experiment_root(self):
64 return self.tree.findtext('ChipWideRunParameters/EXPT_DIR_ROOT')
66 def _get_runfolder_name(self):
70 expt_root = os.path.normpath(self._get_experiment_root())
71 chip_expt_dir = self.tree.findtext('ChipWideRunParameters/EXPT_DIR')
72 # hiseqs renamed the experiment dir location
73 defaults_expt_dir = self.tree.findtext('Defaults/EXPT_DIR')
76 if defaults_expt_dir is not None:
77 _, experiment_dir = os.path.split(defaults_expt_dir)
78 elif expt_root is not None and chip_expt_dir is not None:
79 experiment_dir = chip_expt_dir.replace(expt_root+os.path.sep, '')
80 experiment_dir = experiment_dir.split(os.path.sep)[0]
82 if experiment_dir is None or len(experiment_dir) == 0:
86 runfolder_name = property(_get_runfolder_name)
88 def _get_version(self):
91 ga_version = self.tree.findtext(
92 'ChipWideRunParameters/SOFTWARE_VERSION')
93 if ga_version is not None:
95 hiseq_software_node = self.tree.find('Software')
96 hiseq_version = hiseq_software_node.attrib['Version']
99 version = property(_get_version)
101 def _get_chip_attribute(self, value):
102 return self.tree.findtext('ChipWideRunParameters/%s' % (value,))
106 Debugging function, report current object
108 print 'Gerald version:', self.version
109 print 'Gerald run date:', self.date
110 print 'Gerald config.xml:', self.tree
113 def get_elements(self):
114 if self.tree is None or self.summary is None:
117 gerald = ElementTree.Element(Gerald.GERALD,
118 {'version': unicode(Gerald.XML_VERSION)})
119 gerald.append(self.tree)
120 gerald.append(self.summary.get_elements())
121 if self.eland_results:
122 gerald.append(self.eland_results.get_elements())
125 def set_elements(self, tree):
126 if tree.tag != Gerald.GERALD:
127 raise ValueError('exptected GERALD')
128 xml_version = int(tree.attrib.get('version', 0))
129 if xml_version > Gerald.XML_VERSION:
130 LOGGER.warn('XML tree is a higher version than this class')
131 self.eland_results = ELAND()
132 for element in list(tree):
133 tag = element.tag.lower()
134 if tag == Gerald.RUN_PARAMETERS.lower():
136 elif tag == Gerald.SUMMARY.lower():
137 self.summary = Summary(xml=element)
138 elif tag == ELAND.ELAND.lower():
139 self.eland_results = ELAND(xml=element)
141 LOGGER.warn("Unrecognized tag %s" % (element.tag,))
144 class LaneParameters(object):
146 Make it easy to access elements of LaneSpecificRunParameters from python
148 def __init__(self, gerald, lane_id):
149 self._gerald = gerald
150 self._lane_id = lane_id
152 def _get_analysis(self):
153 raise NotImplemented("abstract class")
154 analysis = property(_get_analysis)
156 def _get_eland_genome(self):
157 raise NotImplemented("abstract class")
158 eland_genome = property(_get_eland_genome)
160 def _get_read_length(self):
161 raise NotImplemented("abstract class")
162 read_length = property(_get_read_length)
164 def _get_use_bases(self):
165 raise NotImplemented("abstract class")
166 use_bases = property(_get_use_bases)
169 class LaneParametersGA(LaneParameters):
171 Make it easy to access elements of LaneSpecificRunParameters from python
173 def __init__(self, gerald, lane_id):
174 super(LaneParametersGA, self).__init__(gerald, lane_id)
176 def __get_attribute(self, xml_tag):
177 subtree = self._gerald.tree.find('LaneSpecificRunParameters')
178 container = subtree.find(xml_tag)
179 if container is None:
181 if len(container.getchildren()) > LANES_PER_FLOWCELL:
182 raise RuntimeError('GERALD config.xml file changed')
183 lanes = [x.tag.split('_')[1] for x in container.getchildren()]
185 index = lanes.index(self._lane_id)
186 except ValueError, e:
188 element = container[index]
190 def _get_analysis(self):
191 return self.__get_attribute('ANALYSIS')
192 analysis = property(_get_analysis)
194 def _get_eland_genome(self):
195 genome = self.__get_attribute('ELAND_GENOME')
196 # default to the chipwide parameters if there isn't an
197 # entry in the lane specific paramaters
199 genome = self._gerald._get_chip_attribute('ELAND_GENOME')
201 if genome == 'Need_to_specify_ELAND_genome_directory':
204 eland_genome = property(_get_eland_genome)
206 def _get_read_length(self):
207 read_length = self.__get_attribute('READ_LENGTH')
208 if read_length is None:
209 read_length = self._gerald._get_chip_attribute('READ_LENGTH')
211 read_length = property(_get_read_length)
213 def _get_use_bases(self):
214 return self.__get_attribute('USE_BASES')
215 use_bases = property(_get_use_bases)
218 class LaneParametersHiSeq(LaneParameters):
220 Make it easy to access elements of LaneSpecificRunParameters from python
222 def __init__(self, gerald, lane_id, element):
223 super(LaneParametersHiSeq, self).__init__(gerald, lane_id)
224 self.element = element
226 def __get_attribute(self, xml_tag):
227 container = self.element.find(xml_tag)
228 if container is None:
230 return container.text
232 def _get_analysis(self):
233 return self.__get_attribute('ANALYSIS')
234 analysis = property(_get_analysis)
236 def _get_eland_genome(self):
237 genome = self.__get_attribute('ELAND_GENOME')
238 # default to the chipwide parameters if there isn't an
239 # entry in the lane specific paramaters
241 genome = self._gerald._get_chip_attribute('ELAND_GENOME')
243 if genome == 'Need_to_specify_ELAND_genome_directory':
246 eland_genome = property(_get_eland_genome)
248 def _get_read_length(self):
249 return self.__get_attribute('READ_LENGTH1')
250 read_length = property(_get_read_length)
252 def _get_use_bases(self):
253 return self.__get_attribute('USE_BASES1')
254 use_bases = property(_get_use_bases)
256 class LaneSpecificRunParameters(object):
258 Provide access to LaneSpecificRunParameters
260 def __init__(self, gerald):
261 self._gerald = gerald
264 def _initalize_lanes(self):
266 build dictionary of LaneParameters
269 tree = self._gerald.tree
270 analysis = tree.find('LaneSpecificRunParameters/ANALYSIS')
271 if analysis is not None:
272 self._extract_ga_analysis_type(analysis)
273 analysis = tree.find('Projects')
274 if analysis is not None:
275 self._extract_hiseq_analysis_type(analysis)
277 def _extract_ga_analysis_type(self, analysis):
278 # according to the pipeline specs I think their fields
279 # are sampleName_laneID, with sampleName defaulting to s
280 # since laneIDs are constant lets just try using
281 # those consistently.
282 for element in analysis:
283 sample, lane_id = element.tag.split('_')
284 self._lanes[int(lane_id)] = LaneParametersGA(
285 self._gerald, lane_id)
287 def _extract_hiseq_analysis_type(self, analysis):
288 """Extract from HiSeq style multiplexed analysis types"""
289 for element in analysis:
290 name = element.attrib['name']
291 self._lanes[name] = LaneParametersHiSeq(self._gerald,
296 return self._lanes.iterkeys()
297 def __getitem__(self, key):
298 if self._lane is None:
299 self._initalize_lanes()
300 return self._lanes[key]
301 def get(self, key, default):
302 if self._lane is None:
303 self._initalize_lanes()
304 return self._lanes.get(key, None)
306 if self._lane is None:
307 self._initalize_lanes()
308 return self._lanes.keys()
310 if self._lane is None:
311 self._initalize_lanes()
312 return self._lanes.values()
314 if self._lane is None:
315 self._initalize_lanes()
316 return self._lanes.items()
318 if self._lane is None:
319 self._initalize_lanes()
320 return len(self._lanes)
323 def gerald(pathname):
325 g.pathname = os.path.expanduser(pathname)
326 path, name = os.path.split(g.pathname)
327 LOGGER.info("Parsing gerald config.xml")
328 config_pathname = os.path.join(g.pathname, 'config.xml')
329 g.tree = ElementTree.parse(config_pathname).getroot()
331 # parse Summary.htm file
332 summary_xml = os.path.join(g.pathname, 'Summary.xml')
333 summary_htm = os.path.join(g.pathname, 'Summary.htm')
334 report_summary = os.path.join(g.pathname, '..', 'Data',
335 'reports', 'Summary', )
336 if os.path.exists(summary_xml):
337 LOGGER.info("Parsing Summary.xml")
338 g.summary = SummaryGA(summary_xml)
339 g.eland_results = eland(g.pathname, g)
340 elif os.path.exists(summary_htm):
341 LOGGER.info("Parsing Summary.htm")
342 g.summary = SummaryGA(summary_htm)
343 g.eland_results = eland(g.pathname, g)
344 elif os.path.isdir(report_summary):
345 LOGGER.info("Parsing %s" % (report_summary,))
346 g.summary = SummaryHiSeq(report_summary)
351 if __name__ == "__main__":
354 g = gerald(sys.argv[1])
355 #ElementTree.dump(g.get_elements())