2 Provide access to information stored in the GERALD directory.
4 from datetime import datetime, date
9 from htsworkflow.pipelines.summary import Summary
10 from htsworkflow.pipelines.eland import eland, ELAND
12 from htsworkflow.pipelines.runfolder import \
17 from htsworkflow.util.ethelp import indent, flatten
21 Capture meaning out of the GERALD directory
25 RUN_PARAMETERS='RunParameters'
28 class LaneParameters(object):
30 Make it easy to access elements of LaneSpecificRunParameters from python
32 def __init__(self, gerald, lane_id):
34 self._lane_id = lane_id
36 def __get_attribute(self, xml_tag):
37 subtree = self._gerald.tree.find('LaneSpecificRunParameters')
38 container = subtree.find(xml_tag)
41 if len(container.getchildren()) > LANES_PER_FLOWCELL:
42 raise RuntimeError('GERALD config.xml file changed')
43 lanes = [x.tag.split('_')[1] for x in container.getchildren()]
45 index = lanes.index(self._lane_id)
48 element = container[index]
50 def _get_analysis(self):
51 return self.__get_attribute('ANALYSIS')
52 analysis = property(_get_analysis)
54 def _get_eland_genome(self):
55 genome = self.__get_attribute('ELAND_GENOME')
56 # default to the chipwide parameters if there isn't an
57 # entry in the lane specific paramaters
59 subtree = self._gerald.tree.find('ChipWideRunParameters')
60 container = subtree.find('ELAND_GENOME')
61 genome = container.text
63 eland_genome = property(_get_eland_genome)
65 def _get_read_length(self):
66 return self.__get_attribute('READ_LENGTH')
67 read_length = property(_get_read_length)
69 def _get_use_bases(self):
70 return self.__get_attribute('USE_BASES')
71 use_bases = property(_get_use_bases)
73 class LaneSpecificRunParameters(object):
75 Provide access to LaneSpecificRunParameters
77 def __init__(self, gerald):
81 def _initalize_lanes(self):
83 build dictionary of LaneParameters
86 tree = self._gerald.tree
87 analysis = tree.find('LaneSpecificRunParameters/ANALYSIS')
88 # according to the pipeline specs I think their fields
89 # are sampleName_laneID, with sampleName defaulting to s
90 # since laneIDs are constant lets just try using
92 for element in analysis:
93 sample, lane_id = element.tag.split('_')
94 self._lanes[int(lane_id)] = Gerald.LaneParameters(
95 self._gerald, lane_id)
97 def __getitem__(self, key):
98 if self._lane is None:
99 self._initalize_lanes()
100 return self._lanes[key]
102 if self._lane is None:
103 self._initalize_lanes()
104 return self._lanes.keys()
106 if self._lane is None:
107 self._initalize_lanes()
108 return self._lanes.values()
110 if self._lane is None:
111 self._initalize_lanes()
112 return self._lanes.items()
114 if self._lane is None:
115 self._initalize_lanes()
116 return len(self._lanes)
118 def __init__(self, xml=None):
122 # parse lane parameters out of the config.xml file
123 self.lanes = Gerald.LaneSpecificRunParameters(self)
126 self.eland_results = None
129 self.set_elements(xml)
132 if self.tree is None:
133 return datetime.today()
134 timestamp = self.tree.findtext('ChipWideRunParameters/TIME_STAMP')
135 epochstamp = time.mktime(time.strptime(timestamp, '%c'))
136 return datetime.fromtimestamp(epochstamp)
137 date = property(_get_date)
140 return time.mktime(self.date.timetuple())
141 time = property(_get_time, doc='return run time as seconds since epoch')
143 def _get_experiment_root(self):
144 if self.tree is None:
146 return self.tree.findtext('ChipWideRunParameters/EXPT_DIR_ROOT')
148 def _get_runfolder_name(self):
149 if self.tree is None:
152 root = self._get_experiment_root()
156 root = os.path.join(root,'')
158 experiment_dir = self.tree.findtext('ChipWideRunParameters/EXPT_DIR')
159 if experiment_dir is None:
161 experiment_dir = experiment_dir.replace(root, '')
162 if len(experiment_dir) == 0:
165 dirnames = experiment_dir.split(os.path.sep)
167 runfolder_name = property(_get_runfolder_name)
169 def _get_version(self):
170 if self.tree is None:
172 return self.tree.findtext('ChipWideRunParameters/SOFTWARE_VERSION')
173 version = property(_get_version)
177 Debugging function, report current object
179 print 'Gerald version:', self.version
180 print 'Gerald run date:', self.date
181 print 'Gerald config.xml:', self.tree
184 def get_elements(self):
185 if self.tree is None or self.summary is None:
188 gerald = ElementTree.Element(Gerald.GERALD,
189 {'version': unicode(Gerald.XML_VERSION)})
190 gerald.append(self.tree)
191 gerald.append(self.summary.get_elements())
192 if self.eland_results:
193 gerald.append(self.eland_results.get_elements())
196 def set_elements(self, tree):
197 if tree.tag != Gerald.GERALD:
198 raise ValueError('exptected GERALD')
199 xml_version = int(tree.attrib.get('version', 0))
200 if xml_version > Gerald.XML_VERSION:
201 logging.warn('XML tree is a higher version than this class')
202 self.eland_results = ELAND()
203 for element in list(tree):
204 tag = element.tag.lower()
205 if tag == Gerald.RUN_PARAMETERS.lower():
207 elif tag == Gerald.SUMMARY.lower():
208 self.summary = Summary(xml=element)
209 elif tag == ELAND.ELAND.lower():
210 self.eland_results = ELAND(xml=element)
212 logging.warn("Unrecognized tag %s" % (element.tag,))
214 def gerald(pathname):
216 g.pathname = os.path.expanduser(pathname)
217 path, name = os.path.split(g.pathname)
218 logging.info("Parsing gerald config.xml")
219 config_pathname = os.path.join(g.pathname, 'config.xml')
220 g.tree = ElementTree.parse(config_pathname).getroot()
222 # parse Summary.htm file
223 summary_pathname = os.path.join(g.pathname, 'Summary.xml')
224 if os.path.exists(summary_pathname):
225 logging.info("Parsing Summary.xml")
227 summary_pathname = os.path.join(g.pathname, 'Summary.htm')
228 logging.info("Parsing Summary.htm")
229 g.summary = Summary(summary_pathname)
231 g.eland_results = eland(g.pathname, g)
234 if __name__ == "__main__":
237 g = gerald(sys.argv[1])
238 #ElementTree.dump(g.get_elements())