2 Provide access to information stored in the GERALD directory.
4 from datetime import datetime, date
9 from htsworkflow.pipelines.summary import Summary
10 from htsworkflow.pipelines.eland import eland, ELAND
12 from htsworkflow.pipelines.runfolder import \
17 from htsworkflow.util.ethelp import indent, flatten
19 LOGGER = logging.getLogger(__name__)
23 Capture meaning out of the GERALD directory
27 RUN_PARAMETERS='RunParameters'
30 class LaneParameters(object):
32 Make it easy to access elements of LaneSpecificRunParameters from python
34 def __init__(self, gerald, lane_id):
36 self._lane_id = lane_id
38 def __get_attribute(self, xml_tag):
39 subtree = self._gerald.tree.find('LaneSpecificRunParameters')
40 container = subtree.find(xml_tag)
43 if len(container.getchildren()) > LANES_PER_FLOWCELL:
44 raise RuntimeError('GERALD config.xml file changed')
45 lanes = [x.tag.split('_')[1] for x in container.getchildren()]
47 index = lanes.index(self._lane_id)
50 element = container[index]
52 def _get_analysis(self):
53 return self.__get_attribute('ANALYSIS')
54 analysis = property(_get_analysis)
56 def _get_eland_genome(self):
57 genome = self.__get_attribute('ELAND_GENOME')
58 # default to the chipwide parameters if there isn't an
59 # entry in the lane specific paramaters
61 genome = self._gerald._get_chip_attribute('ELAND_GENOME')
63 if genome == 'Need_to_specify_ELAND_genome_directory':
66 eland_genome = property(_get_eland_genome)
68 def _get_read_length(self):
69 read_length = self.__get_attribute('READ_LENGTH')
70 if read_length is None:
71 read_length = self._gerald._get_chip_attribute('READ_LENGTH')
73 read_length = property(_get_read_length)
75 def _get_use_bases(self):
76 return self.__get_attribute('USE_BASES')
77 use_bases = property(_get_use_bases)
79 class LaneSpecificRunParameters(object):
81 Provide access to LaneSpecificRunParameters
83 def __init__(self, gerald):
87 def _initalize_lanes(self):
89 build dictionary of LaneParameters
92 tree = self._gerald.tree
93 analysis = tree.find('LaneSpecificRunParameters/ANALYSIS')
94 # according to the pipeline specs I think their fields
95 # are sampleName_laneID, with sampleName defaulting to s
96 # since laneIDs are constant lets just try using
98 for element in analysis:
99 sample, lane_id = element.tag.split('_')
100 self._lanes[int(lane_id)] = Gerald.LaneParameters(
101 self._gerald, lane_id)
103 def __getitem__(self, key):
104 if self._lane is None:
105 self._initalize_lanes()
106 return self._lanes[key]
108 if self._lane is None:
109 self._initalize_lanes()
110 return self._lanes.keys()
112 if self._lane is None:
113 self._initalize_lanes()
114 return self._lanes.values()
116 if self._lane is None:
117 self._initalize_lanes()
118 return self._lanes.items()
120 if self._lane is None:
121 self._initalize_lanes()
122 return len(self._lanes)
124 def __init__(self, xml=None):
128 # parse lane parameters out of the config.xml file
129 self.lanes = Gerald.LaneSpecificRunParameters(self)
132 self.eland_results = None
135 self.set_elements(xml)
138 if self.tree is None:
139 return datetime.today()
140 timestamp = self.tree.findtext('ChipWideRunParameters/TIME_STAMP')
141 epochstamp = time.mktime(time.strptime(timestamp, '%c'))
142 return datetime.fromtimestamp(epochstamp)
143 date = property(_get_date)
146 return time.mktime(self.date.timetuple())
147 time = property(_get_time, doc='return run time as seconds since epoch')
149 def _get_experiment_root(self):
150 if self.tree is None:
152 return self.tree.findtext('ChipWideRunParameters/EXPT_DIR_ROOT')
154 def _get_runfolder_name(self):
155 if self.tree is None:
158 root = self._get_experiment_root()
162 root = os.path.join(root,'')
164 experiment_dir = self.tree.findtext('ChipWideRunParameters/EXPT_DIR')
165 if experiment_dir is None:
167 experiment_dir = experiment_dir.replace(root, '')
168 if len(experiment_dir) == 0:
171 dirnames = experiment_dir.split(os.path.sep)
173 runfolder_name = property(_get_runfolder_name)
175 def _get_version(self):
176 if self.tree is None:
178 return self.tree.findtext('ChipWideRunParameters/SOFTWARE_VERSION')
179 version = property(_get_version)
181 def _get_chip_attribute(self, value):
182 return self.tree.findtext('ChipWideRunParameters/%s' % (value,))
186 Debugging function, report current object
188 print 'Gerald version:', self.version
189 print 'Gerald run date:', self.date
190 print 'Gerald config.xml:', self.tree
193 def get_elements(self):
194 if self.tree is None or self.summary is None:
197 gerald = ElementTree.Element(Gerald.GERALD,
198 {'version': unicode(Gerald.XML_VERSION)})
199 gerald.append(self.tree)
200 gerald.append(self.summary.get_elements())
201 if self.eland_results:
202 gerald.append(self.eland_results.get_elements())
205 def set_elements(self, tree):
206 if tree.tag != Gerald.GERALD:
207 raise ValueError('exptected GERALD')
208 xml_version = int(tree.attrib.get('version', 0))
209 if xml_version > Gerald.XML_VERSION:
210 LOGGER.warn('XML tree is a higher version than this class')
211 self.eland_results = ELAND()
212 for element in list(tree):
213 tag = element.tag.lower()
214 if tag == Gerald.RUN_PARAMETERS.lower():
216 elif tag == Gerald.SUMMARY.lower():
217 self.summary = Summary(xml=element)
218 elif tag == ELAND.ELAND.lower():
219 self.eland_results = ELAND(xml=element)
221 LOGGER.warn("Unrecognized tag %s" % (element.tag,))
223 def gerald(pathname):
225 g.pathname = os.path.expanduser(pathname)
226 path, name = os.path.split(g.pathname)
227 LOGGER.info("Parsing gerald config.xml")
228 config_pathname = os.path.join(g.pathname, 'config.xml')
229 g.tree = ElementTree.parse(config_pathname).getroot()
231 # parse Summary.htm file
232 summary_pathname = os.path.join(g.pathname, 'Summary.xml')
233 if os.path.exists(summary_pathname):
234 LOGGER.info("Parsing Summary.xml")
236 summary_pathname = os.path.join(g.pathname, 'Summary.htm')
237 LOGGER.info("Parsing Summary.htm")
238 g.summary = Summary(summary_pathname)
240 g.eland_results = eland(g.pathname, g)
243 if __name__ == "__main__":
246 g = gerald(sys.argv[1])
247 #ElementTree.dump(g.get_elements())