2 Extract configuration from Illumina Bustard Directory.
4 This includes the version number, run date, bustard executable parameters, and
8 from datetime import date
16 from htsworkflow.pipelines.runfolder import \
22 __docformat__ = "restructuredtext en"
24 LANE_LIST = range(1,9)
26 class Phasing(object):
28 PREPHASING = 'Prephasing'
30 def __init__(self, fromfile=None, xml=None):
33 self.prephasing = None
35 if fromfile is not None:
36 self._initialize_from_file(fromfile)
38 self.set_elements(xml)
40 def _initialize_from_file(self, pathname):
41 path, name = os.path.split(pathname)
42 basename, ext = os.path.splitext(name)
43 # the last character of the param base filename should be the
45 tree = ElementTree.parse(pathname).getroot()
46 self.set_elements(tree)
47 self.lane = int(basename[-1])
49 def get_elements(self):
50 root = ElementTree.Element(Phasing.PHASING, {'lane': str(self.lane)})
51 root.tail = os.linesep
52 phasing = ElementTree.SubElement(root, Phasing.PHASING)
53 phasing.text = str(self.phasing)
54 phasing.tail = os.linesep
55 prephasing = ElementTree.SubElement(root, Phasing.PREPHASING)
56 prephasing.text = str(self.prephasing)
57 prephasing.tail = os.linesep
60 def set_elements(self, tree):
61 if tree.tag not in ('Phasing', 'Parameters'):
62 raise ValueError('exptected Phasing or Parameters')
63 lane = tree.attrib.get('lane', None)
66 for element in list(tree):
67 if element.tag == Phasing.PHASING:
68 self.phasing = float(element.text)
69 elif element.tag == Phasing.PREPHASING:
70 self.prephasing = float(element.text)
72 class CrosstalkMatrix(object):
73 CROSSTALK = "MatrixElements"
77 def __init__(self, fromfile=None, xml=None):
80 if fromfile is not None:
81 self._initialize_from_file(fromfile)
83 self.set_elements(xml)
85 def _initialize_from_file(self, pathname):
86 data = open(pathname).readlines()
87 auto_header = '# Auto-generated frequency response matrix'
88 if data[0].strip() != auto_header or len(data) != 9:
89 raise RuntimeError("matrix file %s is unusual" % (pathname,))
90 # skip over lines 1,2,3,4 which contain the 4 bases
91 self.base['A'] = [ float(v) for v in data[5].split() ]
92 self.base['C'] = [ float(v) for v in data[6].split() ]
93 self.base['G'] = [ float(v) for v in data[7].split() ]
94 self.base['T'] = [ float(v) for v in data[8].split() ]
96 def get_elements(self):
97 root = ElementTree.Element(CrosstalkMatrix.CROSSTALK)
98 root.tail = os.linesep
99 base_order = ['A','C','G','T']
101 base_element = ElementTree.SubElement(root, CrosstalkMatrix.BASE)
102 base_element.text = b
103 base_element.tail = os.linesep
105 for value in self.base[b]:
106 crosstalk_value = ElementTree.SubElement(root, CrosstalkMatrix.ELEMENT)
107 crosstalk_value.text = unicode(value)
108 crosstalk_value.tail = os.linesep
112 def set_elements(self, tree):
113 if tree.tag != CrosstalkMatrix.CROSSTALK:
114 raise ValueError('Invalid run-xml exptected '+CrosstalkMatrix.CROSSTALK)
118 for element in tree.getchildren():
119 # read in the order of the bases
120 if element.tag == 'Base':
121 base_order.append(element.text)
122 elif element.tag == 'Element':
123 # we're done reading bases, now its just the 4x4 matrix
124 # written out as a list of elements
125 # if this is the first element, make a copy of the list
126 # to play with and initialize an empty list for the current base
127 if current_base is None:
128 current_base = copy(base_order)
129 self.base[current_base[0]] = []
130 # we found (probably) 4 bases go to the next base
131 if current_index == len(base_order):
134 self.base[current_base[0]] = []
135 value = float(element.text)
136 self.base[current_base[0]].append(value)
140 raise RuntimeError("Unrecognized tag in run xml: %s" %(element.tag,))
142 def crosstalk_matrix_from_bustard_config(bustard_path, bustard_config_tree):
144 Analyze the bustard config file and try to find the crosstalk matrix.
146 bustard_run = bustard_config_tree[0]
147 if bustard_run.tag != 'Run':
148 raise RuntimeError('Expected Run tag, got %s' % (bustard_run.tag,))
150 call_parameters = bustard_run.find('BaseCallParameters')
151 if call_parameters is None:
152 raise RuntimeError('Missing BaseCallParameters section')
154 matrix = call_parameters.find('Matrix')
156 raise RuntimeError('Expected to find Matrix in Bustard BaseCallParameters')
158 matrix_auto_flag = int(matrix.find('AutoFlag').text)
159 matrix_auto_lane = int(matrix.find('AutoLane').text)
162 # we estimated the matrix from something in this run.
163 # though we don't really care which lane it was
164 matrix_path = os.path.join(bustard_path, 'Matrix', 's_02_matrix.txt')
165 matrix = CrosstalkMatrix(matrix_path)
167 # the matrix was provided
168 matrix_elements = call_parameters.find('MatrixElements')
169 if matrix_elements is None:
170 raise RuntimeError('Expected to find MatrixElements in Bustard BaseCallParameters')
171 matrix = CrosstalkMatrix(xml=matrix_elements)
173 print "matrix:", matrix
176 class Bustard(object):
181 SOFTWARE_VERSION = 'version'
184 PARAMETERS = 'Parameters'
185 BUSTARD_CONFIG = 'BaseCallAnalysis'
187 def __init__(self, xml=None):
189 self.date = date.today()
192 self.crosstalk = None
194 self.bustard_config = None
197 self.set_elements(xml)
200 return time.mktime(self.date.timetuple())
201 time = property(_get_time, doc='return run time as seconds since epoch')
204 #print ElementTree.tostring(self.get_elements())
205 ElementTree.dump(self.get_elements())
207 def get_elements(self):
208 root = ElementTree.Element('Bustard',
209 {'version': str(Bustard.XML_VERSION)})
210 version = ElementTree.SubElement(root, Bustard.SOFTWARE_VERSION)
211 version.text = self.version
212 run_date = ElementTree.SubElement(root, Bustard.DATE)
213 run_date.text = str(self.time)
214 user = ElementTree.SubElement(root, Bustard.USER)
215 user.text = self.user
216 params = ElementTree.SubElement(root, Bustard.PARAMETERS)
218 # add phasing parameters
219 for lane in LANE_LIST:
220 params.append(self.phasing[lane].get_elements())
222 # add crosstalk matrix if it exists
223 if self.crosstalk is not None:
224 root.append(self.crosstalk.get_elements())
226 # add bustard config if it exists
227 if self.bustard_config is not None:
228 root.append(self.bustard_config)
231 def set_elements(self, tree):
232 if tree.tag != Bustard.BUSTARD:
233 raise ValueError('Expected "Bustard" SubElements')
234 xml_version = int(tree.attrib.get('version', 0))
235 if xml_version > Bustard.XML_VERSION:
236 logging.warn('Bustard XML tree is a higher version than this class')
237 for element in list(tree):
238 if element.tag == Bustard.SOFTWARE_VERSION:
239 self.version = element.text
240 elif element.tag == Bustard.DATE:
241 self.date = date.fromtimestamp(float(element.text))
242 elif element.tag == Bustard.USER:
243 self.user = element.text
244 elif element.tag == Bustard.PARAMETERS:
245 for param in element:
246 p = Phasing(xml=param)
247 self.phasing[p.lane] = p
248 elif element.tag == CrosstalkMatrix.CROSSTALK:
249 self.crosstalk = CrosstalkMatrix(xml=element)
250 elif element.tag == Bustard.BUSTARD_CONFIG:
251 self.bustard_config = element
253 raise ValueError("Unrecognized tag: %s" % (element.tag,))
255 def bustard(pathname):
257 Construct a Bustard object by analyzing an Illumina Bustard directory.
260 - `pathname`: A bustard directory
263 Fully initialized Bustard object.
266 pathname = os.path.abspath(pathname)
267 path, name = os.path.split(pathname)
268 groups = name.split("_")
269 version = re.search(VERSION_RE, groups[0])
270 b.version = version.group(1)
271 t = time.strptime(groups[1], EUROPEAN_STRPTIME)
272 b.date = date(*t[0:3])
274 b.pathname = pathname
275 bustard_config_filename = os.path.join(pathname, 'config.xml')
276 print bustard_config_filename
277 paramfiles = glob(os.path.join(pathname, "params?.xml"))
278 for paramfile in paramfiles:
279 phasing = Phasing(paramfile)
280 assert (phasing.lane >= 1 and phasing.lane <= 8)
281 b.phasing[phasing.lane] = phasing
282 # I only found these in Bustard1.9.5/1.9.6 directories
283 if b.version in ('1.9.5', '1.9.6'):
284 # at least for our runfolders for 1.9.5 and 1.9.6 matrix[1-8].txt are always the same
285 crosstalk_file = os.path.join(pathname, "matrix1.txt")
286 b.crosstalk = CrosstalkMatrix(crosstalk_file)
287 # for version 1.3.2 of the pipeline the bustard version number went down
288 # to match the rest of the pipeline. However there's now a nifty
289 # new (useful) bustard config file.
290 elif os.path.exists(bustard_config_filename):
291 bustard_config_root = ElementTree.parse(bustard_config_filename)
292 b.bustard_config = bustard_config_root.getroot()
293 b.crosstalk = crosstalk_matrix_from_bustard_config(b.pathname, b.bustard_config)
299 Reconstruct a htsworkflow.pipelines.Bustard object from an xml block
305 def make_cmdline_parser():
306 from optparse import OptionParser
307 parser = OptionParser('%prog: bustard_directory')
311 parser = make_cmdline_parser()
312 opts, args = parser.parse_args(cmdline)
314 for bustard_dir in args:
315 print u'analyzing bustard directory: ' + unicode(bustard_dir)
316 bustard_object = bustard(bustard_dir)
317 bustard_object.dump()
319 bustard_object2 = Bustard(xml=bustard_object.get_elements())
320 print ('-------------------------------------')
321 bustard_object2.dump()
322 print ('=====================================')
323 b1_tree = bustard_object.get_elements()
324 b1 = ElementTree.tostring(b1_tree).split(os.linesep)
325 b2_tree = bustard_object2.get_elements()
326 b2 = ElementTree.tostring(b2_tree).split(os.linesep)
327 for line1, line2 in zip(b1, b2):
332 if __name__ == "__main__":