2 Provide access to information stored in the GERALD directory.
4 from datetime import datetime, date
10 from htsworkflow.pipelines.summary import Summary
11 from htsworkflow.pipelines.eland import eland, ELAND
13 from htsworkflow.pipelines.runfolder import \
18 from htsworkflow.util.ethelp import indent, flatten
20 LOGGER = logging.getLogger(__name__)
24 Capture meaning out of the GERALD directory
28 RUN_PARAMETERS='RunParameters'
31 class LaneParameters(object):
33 Make it easy to access elements of LaneSpecificRunParameters from python
35 def __init__(self, gerald, lane_id):
37 self._lane_id = lane_id
39 def __get_attribute(self, xml_tag):
40 subtree = self._gerald.tree.find('LaneSpecificRunParameters')
41 container = subtree.find(xml_tag)
44 if len(container.getchildren()) > LANES_PER_FLOWCELL:
45 raise RuntimeError('GERALD config.xml file changed')
46 lanes = [x.tag.split('_')[1] for x in container.getchildren()]
48 index = lanes.index(self._lane_id)
51 element = container[index]
53 def _get_analysis(self):
54 return self.__get_attribute('ANALYSIS')
55 analysis = property(_get_analysis)
57 def _get_eland_genome(self):
58 genome = self.__get_attribute('ELAND_GENOME')
59 # default to the chipwide parameters if there isn't an
60 # entry in the lane specific paramaters
62 genome = self._gerald._get_chip_attribute('ELAND_GENOME')
64 if genome == 'Need_to_specify_ELAND_genome_directory':
67 eland_genome = property(_get_eland_genome)
69 def _get_read_length(self):
70 read_length = self.__get_attribute('READ_LENGTH')
71 if read_length is None:
72 read_length = self._gerald._get_chip_attribute('READ_LENGTH')
74 read_length = property(_get_read_length)
76 def _get_use_bases(self):
77 return self.__get_attribute('USE_BASES')
78 use_bases = property(_get_use_bases)
80 class LaneSpecificRunParameters(object):
82 Provide access to LaneSpecificRunParameters
84 def __init__(self, gerald):
88 def _initalize_lanes(self):
90 build dictionary of LaneParameters
93 tree = self._gerald.tree
94 analysis = tree.find('LaneSpecificRunParameters/ANALYSIS')
97 # according to the pipeline specs I think their fields
98 # are sampleName_laneID, with sampleName defaulting to s
99 # since laneIDs are constant lets just try using
100 # those consistently.
101 for element in analysis:
102 sample, lane_id = element.tag.split('_')
103 self._lanes[int(lane_id)] = Gerald.LaneParameters(
104 self._gerald, lane_id)
106 def __getitem__(self, key):
107 if self._lane is None:
108 self._initalize_lanes()
109 return self._lanes[key]
110 def get(self, key, default):
111 if self._lane is None:
112 self._initalize_lanes()
113 return self._lanes.get(key, None)
115 if self._lane is None:
116 self._initalize_lanes()
117 return self._lanes.keys()
119 if self._lane is None:
120 self._initalize_lanes()
121 return self._lanes.values()
123 if self._lane is None:
124 self._initalize_lanes()
125 return self._lanes.items()
127 if self._lane is None:
128 self._initalize_lanes()
129 return len(self._lanes)
131 def __init__(self, xml=None):
135 # parse lane parameters out of the config.xml file
136 self.lanes = Gerald.LaneSpecificRunParameters(self)
139 self.eland_results = None
142 self.set_elements(xml)
145 if self.tree is None:
146 return datetime.today()
147 timestamp = self.tree.findtext('ChipWideRunParameters/TIME_STAMP')
148 if timestamp is not None:
149 epochstamp = time.mktime(time.strptime(timestamp, '%c'))
150 return datetime.fromtimestamp(epochstamp)
151 if self.pathname is not None:
152 epochstamp = os.stat(self.pathname)[stat.ST_MTIME]
153 return datetime.fromtimestamp(epochstamp)
154 return datetime.today()
155 date = property(_get_date)
158 return time.mktime(self.date.timetuple())
159 time = property(_get_time, doc='return run time as seconds since epoch')
161 def _get_experiment_root(self):
162 if self.tree is None:
164 return self.tree.findtext('ChipWideRunParameters/EXPT_DIR_ROOT')
166 def _get_runfolder_name(self):
167 if self.tree is None:
170 expt_root = os.path.normpath(self._get_experiment_root())
171 chip_expt_dir = self.tree.findtext('ChipWideRunParameters/EXPT_DIR')
172 # hiseqs renamed the experiment dir location
173 defaults_expt_dir = self.tree.findtext('Defaults/EXPT_DIR')
175 experiment_dir = None
176 if defaults_expt_dir is not None:
177 _, experiment_dir = os.path.split(defaults_expt_dir)
178 elif expt_root is not None and chip_expt_dir is not None:
179 experiment_dir = chip_expt_dir.replace(expt_root+os.path.sep, '')
180 experiment_dir = experiment_dir.split(os.path.sep)[0]
182 if experiment_dir is None or len(experiment_dir) == 0:
184 return experiment_dir
186 runfolder_name = property(_get_runfolder_name)
188 def _get_version(self):
189 if self.tree is None:
191 ga_version = self.tree.findtext(
192 'ChipWideRunParameters/SOFTWARE_VERSION')
193 if ga_version is not None:
195 hiseq_software_node = self.tree.find('Software')
196 hiseq_version = hiseq_software_node.attrib['Version']
199 version = property(_get_version)
201 def _get_chip_attribute(self, value):
202 return self.tree.findtext('ChipWideRunParameters/%s' % (value,))
206 Debugging function, report current object
208 print 'Gerald version:', self.version
209 print 'Gerald run date:', self.date
210 print 'Gerald config.xml:', self.tree
213 def get_elements(self):
214 if self.tree is None or self.summary is None:
217 gerald = ElementTree.Element(Gerald.GERALD,
218 {'version': unicode(Gerald.XML_VERSION)})
219 gerald.append(self.tree)
220 gerald.append(self.summary.get_elements())
221 if self.eland_results:
222 gerald.append(self.eland_results.get_elements())
225 def set_elements(self, tree):
226 if tree.tag != Gerald.GERALD:
227 raise ValueError('exptected GERALD')
228 xml_version = int(tree.attrib.get('version', 0))
229 if xml_version > Gerald.XML_VERSION:
230 LOGGER.warn('XML tree is a higher version than this class')
231 self.eland_results = ELAND()
232 for element in list(tree):
233 tag = element.tag.lower()
234 if tag == Gerald.RUN_PARAMETERS.lower():
236 elif tag == Gerald.SUMMARY.lower():
237 self.summary = Summary(xml=element)
238 elif tag == ELAND.ELAND.lower():
239 self.eland_results = ELAND(xml=element)
241 LOGGER.warn("Unrecognized tag %s" % (element.tag,))
243 def gerald(pathname):
245 g.pathname = os.path.expanduser(pathname)
246 path, name = os.path.split(g.pathname)
247 LOGGER.info("Parsing gerald config.xml")
248 config_pathname = os.path.join(g.pathname, 'config.xml')
249 g.tree = ElementTree.parse(config_pathname).getroot()
251 # parse Summary.htm file
252 summary_xml = os.path.join(g.pathname, 'Summary.xml')
253 summary_htm = os.path.join(g.pathname, 'Summary.htm')
254 status_files_summary = os.path.join(g.pathname, '..', 'Data', 'Status_Files', 'Summary.htm')
255 if os.path.exists(summary_xml):
256 LOGGER.info("Parsing Summary.xml")
257 summary_pathname = summary_xml
258 elif os.path.exists(summary_htm):
259 summary_pathname = os.path.join(g.pathname, 'Summary.htm')
260 LOGGER.info("Parsing Summary.htm")
262 summary_pathname = status_files_summary
263 LOGGER.info("Parsing %s" % (status_files_summary,))
264 g.summary = Summary(summary_pathname)
266 g.eland_results = eland(g.pathname, g)
269 if __name__ == "__main__":
272 g = gerald(sys.argv[1])
273 #ElementTree.dump(g.get_elements())