2 Extract configuration from Illumina Bustard Directory.
4 This includes the version number, run date, bustard executable parameters, and
8 from datetime import date
16 from htsworkflow.pipelines.runfolder import \
21 LOGGER = logging.getLogger(__name__)
24 __docformat__ = "restructuredtext en"
26 LANE_LIST = range(1,9)
28 class Phasing(object):
30 PREPHASING = 'Prephasing'
32 def __init__(self, fromfile=None, xml=None):
35 self.prephasing = None
37 if fromfile is not None:
38 self._initialize_from_file(fromfile)
40 self.set_elements(xml)
42 def _initialize_from_file(self, pathname):
43 path, name = os.path.split(pathname)
44 basename, ext = os.path.splitext(name)
45 # the last character of the param base filename should be the
47 tree = ElementTree.parse(pathname).getroot()
48 self.set_elements(tree)
49 self.lane = int(basename[-1])
51 def get_elements(self):
52 root = ElementTree.Element(Phasing.PHASING, {'lane': str(self.lane)})
53 root.tail = os.linesep
54 phasing = ElementTree.SubElement(root, Phasing.PHASING)
55 phasing.text = str(self.phasing)
56 phasing.tail = os.linesep
57 prephasing = ElementTree.SubElement(root, Phasing.PREPHASING)
58 prephasing.text = str(self.prephasing)
59 prephasing.tail = os.linesep
62 def set_elements(self, tree):
63 if tree.tag not in ('Phasing', 'Parameters'):
64 raise ValueError('exptected Phasing or Parameters')
65 lane = tree.attrib.get('lane', None)
68 for element in list(tree):
69 if element.tag == Phasing.PHASING:
70 self.phasing = float(element.text)
71 elif element.tag == Phasing.PREPHASING:
72 self.prephasing = float(element.text)
74 class CrosstalkMatrix(object):
75 CROSSTALK = "MatrixElements"
79 def __init__(self, fromfile=None, xml=None):
82 if fromfile is not None:
83 self._initialize_from_file(fromfile)
85 self.set_elements(xml)
87 def _initialize_from_file(self, pathname):
88 data = open(pathname).readlines()
89 auto_header = '# Auto-generated frequency response matrix'
90 if data[0].strip() == auto_header and len(data) == 9:
91 # skip over lines 1,2,3,4 which contain the 4 bases
92 self.base['A'] = [ float(v) for v in data[5].split() ]
93 self.base['C'] = [ float(v) for v in data[6].split() ]
94 self.base['G'] = [ float(v) for v in data[7].split() ]
95 self.base['T'] = [ float(v) for v in data[8].split() ]
97 self.base['A'] = [ float(v) for v in data[:4] ]
98 self.base['C'] = [ float(v) for v in data[4:8] ]
99 self.base['G'] = [ float(v) for v in data[8:12] ]
100 self.base['T'] = [ float(v) for v in data[12:16] ]
102 raise RuntimeError("matrix file %s is unusual" % (pathname,))
103 def get_elements(self):
104 root = ElementTree.Element(CrosstalkMatrix.CROSSTALK)
105 root.tail = os.linesep
106 base_order = ['A','C','G','T']
108 base_element = ElementTree.SubElement(root, CrosstalkMatrix.BASE)
109 base_element.text = b
110 base_element.tail = os.linesep
112 for value in self.base[b]:
113 crosstalk_value = ElementTree.SubElement(root, CrosstalkMatrix.ELEMENT)
114 crosstalk_value.text = unicode(value)
115 crosstalk_value.tail = os.linesep
119 def set_elements(self, tree):
120 if tree.tag != CrosstalkMatrix.CROSSTALK:
121 raise ValueError('Invalid run-xml exptected '+CrosstalkMatrix.CROSSTALK)
125 for element in tree.getchildren():
126 # read in the order of the bases
127 if element.tag == 'Base':
128 base_order.append(element.text)
129 elif element.tag == 'Element':
130 # we're done reading bases, now its just the 4x4 matrix
131 # written out as a list of elements
132 # if this is the first element, make a copy of the list
133 # to play with and initialize an empty list for the current base
134 if current_base is None:
135 current_base = copy(base_order)
136 self.base[current_base[0]] = []
137 # we found (probably) 4 bases go to the next base
138 if current_index == len(base_order):
141 self.base[current_base[0]] = []
142 value = float(element.text)
143 self.base[current_base[0]].append(value)
147 raise RuntimeError("Unrecognized tag in run xml: %s" %(element.tag,))
149 def crosstalk_matrix_from_bustard_config(bustard_path, bustard_config_tree):
151 Analyze the bustard config file and try to find the crosstalk matrix.
153 bustard_run = bustard_config_tree[0]
154 if bustard_run.tag != 'Run':
155 raise RuntimeError('Expected Run tag, got %s' % (bustard_run.tag,))
157 call_parameters = bustard_run.find('BaseCallParameters')
158 if call_parameters is None:
159 raise RuntimeError('Missing BaseCallParameters section')
161 matrix = call_parameters.find('Matrix')
163 raise RuntimeError('Expected to find Matrix in Bustard BaseCallParameters')
165 matrix_auto_flag = int(matrix.find('AutoFlag').text)
166 matrix_auto_lane = int(matrix.find('AutoLane').text)
170 # we estimated the matrix from something in this run.
171 # though we don't really care which lane it was
172 if matrix_auto_lane == 0:
173 auto_lane_fragment = ""
175 auto_lane_fragment = "_%d" % ( matrix_auto_lane,)
177 for matrix_name in ['s%s_02_matrix.txt' % (auto_lane_fragment,),
178 's%s_1_matrix.txt' % (auto_lane_fragment,),
180 matrix_path = os.path.join(bustard_path, 'Matrix', matrix_name)
181 if os.path.exists(matrix_path):
184 raise RuntimeError("Couldn't find matrix for lane %d" % \
187 crosstalk = CrosstalkMatrix(matrix_path)
189 matrix_elements = call_parameters.find('MatrixElements')
190 # the matrix was provided
191 if matrix_elements is not None:
192 crosstalk = CrosstalkMatrix(xml=matrix_elements)
194 # we have no crosstalk matrix?
199 class Bustard(object):
204 SOFTWARE_VERSION = 'version'
207 PARAMETERS = 'Parameters'
208 BUSTARD_CONFIG = 'BaseCallAnalysis'
210 def __init__(self, xml=None):
215 self.crosstalk = None
217 self.bustard_config = None
220 self.set_elements(xml)
223 if self.date is None:
225 return time.mktime(self.date.timetuple())
226 time = property(_get_time, doc='return run time as seconds since epoch')
229 #print ElementTree.tostring(self.get_elements())
230 ElementTree.dump(self.get_elements())
232 def get_elements(self):
233 root = ElementTree.Element('Bustard',
234 {'version': str(Bustard.XML_VERSION)})
235 version = ElementTree.SubElement(root, Bustard.SOFTWARE_VERSION)
236 version.text = self.version
237 if self.time is not None:
238 run_date = ElementTree.SubElement(root, Bustard.DATE)
239 run_date.text = str(self.time)
240 if self.user is not None:
241 user = ElementTree.SubElement(root, Bustard.USER)
242 user.text = self.user
243 params = ElementTree.SubElement(root, Bustard.PARAMETERS)
245 # add phasing parameters
246 for lane in LANE_LIST:
247 if self.phasing.has_key(lane):
248 params.append(self.phasing[lane].get_elements())
250 # add crosstalk matrix if it exists
251 if self.crosstalk is not None:
252 root.append(self.crosstalk.get_elements())
254 # add bustard config if it exists
255 if self.bustard_config is not None:
256 root.append(self.bustard_config)
259 def set_elements(self, tree):
260 if tree.tag != Bustard.BUSTARD:
261 raise ValueError('Expected "Bustard" SubElements')
262 xml_version = int(tree.attrib.get('version', 0))
263 if xml_version > Bustard.XML_VERSION:
264 LOGGER.warn('Bustard XML tree is a higher version than this class')
265 for element in list(tree):
266 if element.tag == Bustard.SOFTWARE_VERSION:
267 self.version = element.text
268 elif element.tag == Bustard.DATE:
269 self.date = date.fromtimestamp(float(element.text))
270 elif element.tag == Bustard.USER:
271 self.user = element.text
272 elif element.tag == Bustard.PARAMETERS:
273 for param in element:
274 p = Phasing(xml=param)
275 self.phasing[p.lane] = p
276 elif element.tag == CrosstalkMatrix.CROSSTALK:
277 self.crosstalk = CrosstalkMatrix(xml=element)
278 elif element.tag == Bustard.BUSTARD_CONFIG:
279 self.bustard_config = element
281 raise ValueError("Unrecognized tag: %s" % (element.tag,))
283 def bustard(pathname):
285 Construct a Bustard object by analyzing an Illumina Bustard directory.
288 - `pathname`: A bustard directory
291 Fully initialized Bustard object.
294 pathname = os.path.abspath(pathname)
295 path, name = os.path.split(pathname)
296 groups = name.split("_")
297 if groups[0].lower().startswith('bustard'):
298 version = re.search(VERSION_RE, groups[0])
299 b.version = version.group(1)
300 t = time.strptime(groups[1], EUROPEAN_STRPTIME)
301 b.date = date(*t[0:3])
303 elif groups[0] == 'BaseCalls':
309 b.pathname = pathname
310 bustard_config_filename = os.path.join(pathname, 'config.xml')
311 paramfiles = glob(os.path.join(pathname, "params?.xml"))
312 for paramfile in paramfiles:
313 phasing = Phasing(paramfile)
314 assert (phasing.lane >= 1 and phasing.lane <= 8)
315 b.phasing[phasing.lane] = phasing
316 # I only found these in Bustard1.9.5/1.9.6 directories
317 if b.version in ('1.9.5', '1.9.6'):
318 # at least for our runfolders for 1.9.5 and 1.9.6 matrix[1-8].txt are always the same
319 crosstalk_file = os.path.join(pathname, "matrix1.txt")
320 b.crosstalk = CrosstalkMatrix(crosstalk_file)
321 # for version 1.3.2 of the pipeline the bustard version number went down
322 # to match the rest of the pipeline. However there's now a nifty
323 # new (useful) bustard config file.
324 elif os.path.exists(bustard_config_filename):
325 bustard_config_root = ElementTree.parse(bustard_config_filename)
326 b.bustard_config = bustard_config_root.getroot()
327 b.crosstalk = crosstalk_matrix_from_bustard_config(b.pathname, b.bustard_config)
328 software = bustard_config_root.find('*/Software')
329 b.version = software.attrib['Version']
330 #b.version = software.attrib['Name'] + "-" + software.attrib['Version']
336 Reconstruct a htsworkflow.pipelines.Bustard object from an xml block
342 def make_cmdline_parser():
343 from optparse import OptionParser
344 parser = OptionParser('%prog: bustard_directory')
348 parser = make_cmdline_parser()
349 opts, args = parser.parse_args(cmdline)
351 for bustard_dir in args:
352 print u'analyzing bustard directory: ' + unicode(bustard_dir)
353 bustard_object = bustard(bustard_dir)
354 bustard_object.dump()
356 bustard_object2 = Bustard(xml=bustard_object.get_elements())
357 print ('-------------------------------------')
358 bustard_object2.dump()
359 print ('=====================================')
360 b1_tree = bustard_object.get_elements()
361 b1 = ElementTree.tostring(b1_tree).split(os.linesep)
362 b2_tree = bustard_object2.get_elements()
363 b2 = ElementTree.tostring(b2_tree).split(os.linesep)
364 for line1, line2 in zip(b1, b2):
369 if __name__ == "__main__":