1 """ElementTree helper functions
5 LOGGER = logging.getLogger(__name__)
9 XHTML_RDF_DTD = lxml.etree.DTD(external_id='-//W3C//DTD XHTML+RDFa 1.0//EN')
10 except lxml.etree.DTDParseError as e:
12 LOGGER.warn("Unable to load XHTML DTD %s" % (str(e),))
14 def indent(elem, level=0):
16 reformat an element tree to be 'pretty' (indented)
20 if not elem.text or not elem.text.strip():
23 indent(child, level+1)
24 # we don't want the closing tag indented too far
26 if not elem.tail or not elem.tail.strip():
29 if level and (not elem.tail or not elem.tail.strip()):
32 def flatten(elem, include_tail=0):
34 Extract the text from an element tree
35 (AKA extract the text that not part of XML tags)
37 text = elem.text or ""
40 if include_tail and elem.tail: text += elem.tail
43 def validate_xhtml(html, base_url='http://localhost'):
44 """Helper for validating xhtml, mostly intended for test code
46 Defaults to assuming XHTML+RDFa
47 Returns None if there was a problem configuring validation
48 Logs messages from lxml.etree using python logging
49 Returns True if it passed validation
50 and False if it fails.
52 if XHTML_RDF_DTD is None:
56 root = lxml.etree.fromstring(html, base_url=base_url)
57 except lxml.etree.ParseError as e:
58 LOGGER.warn("Unable to parse document: %s" % (str(e),))
61 if XHTML_RDF_DTD.validate(root):
66 for msg in XHTML_RDF_DTD.error_log.filter_from_errors():
67 # I have no idea how to suppress this error
68 # but I need the xmlns attributes for of my RDFa 1.0 encoding
69 if 'ERROR:VALID:DTD_UNKNOWN_ATTRIBUTE' in str(msg):