1 """ElementTree helper functions
5 LOGGER = logging.getLogger(__name__)
9 XHTML_RDF_DTD = lxml.etree.DTD(external_id='-//W3C//DTD XHTML+RDFa 1.0//EN')
10 except lxml.etree.DTDParseError as e:
11 LOGGER.warn("Unable to load XHTML DTD %s" % (str(e),))
13 def indent(elem, level=0):
15 reformat an element tree to be 'pretty' (indented)
19 if not elem.text or not elem.text.strip():
22 indent(child, level+1)
23 # we don't want the closing tag indented too far
25 if not elem.tail or not elem.tail.strip():
28 if level and (not elem.tail or not elem.tail.strip()):
31 def flatten(elem, include_tail=0):
33 Extract the text from an element tree
34 (AKA extract the text that not part of XML tags)
36 text = elem.text or ""
39 if include_tail and elem.tail: text += elem.tail
42 def validate_xhtml(html, base_url='http://localhost'):
43 """Helper for validating xhtml, mostly intended for test code
45 Defaults to assuming XHTML+RDFa
46 Returns None if there was a problem configuring validation
47 Logs messages from lxml.etree using python logging
48 Returns True if it passed validation
49 and False if it fails.
51 if XHTML_RDF_DTD is None:
55 root = lxml.etree.fromstring(html, base_url=base_url)
56 except lxml.etree.ParseError as e:
57 LOGGER.warn("Unable to parse document: %s" % (str(e),))
60 if XHTML_RDF_DTD.validate(root):
65 for msg in XHTML_RDF_DTD.error_log.filter_from_errors():
66 # I have no idea how to suppress this error
67 # but I need the xmlns attributes for of my RDFa 1.0 encoding
68 if 'ERROR:VALID:DTD_UNKNOWN_ATTRIBUTE' in str(msg):