prev | Draft Version 574 (Thu Dec 1 09:18:53 2005) | next |
<person role="litigant"> <given-name>Charles</given-name> <surname>Babbage</surname> </person>
"<>"
<tagname>…</tagname>
<tagname/>
<X>…<Y>…</Y></X>
is legal…<X>…<Y>…</X></Y>
is not<first> This document is <em>illegal</em> </first> <second> because it does not have a unique root element. </second>
"<"
and ">"
&name;
html | Root element of entire HTML document. |
---|---|
body | Body of page (i.e., visible content). |
h1 | Top-level heading. Use h2 , h3 , etc. for second- and third-level headings. |
p | Paragraph. |
em | Emphasized text; browser or editor will usually display it in italics. |
address | Address of document author (also usually displayed in italics). |
h1
(level-1 heading) is semantic, i
(italics) is display<html> <body> <h1>Software Carpentry</h1> <p>This course will introduce <em>essential software development skills</em>, and show where and how they should be applied.</p> <address>Greg Wilson (gvwilson@third-bit.com)</address> </html> |
<h1>A Centered Heading</h1>
<p>This planet provided as-is.</p>
<p>…</p>
is illegal<p>…</p>
, but modern parsers will reject itWith Attributes | Without Attributes |
---|---|
<a b="c"> <d e="f"/> </a> |
<a> <a-b>c</a-b> <d><d-e>f</d-e></d> </a> |
head
element as well as a body
<!--
, and end with -->
<html> <head> <title>Comments Page</title> <meta name="author" content="aturing"/> </head> <body> <!-- House style puts all titles in italics --> <h1><em>Welcome to the Comments Page</em></h1> <!-- Update this paragraph to describe the forum. --> <p>Welcome to the Comments Forum.</p> </body> </html>
ul
for an unordered (bulleted) list, and ol
for an ordered (numbered) oneli
table
for tablestr
(for “table row”)td
(for “table data”)<html> <head> <title>Lists and Tables</title> <meta name="svn" content="$Id: xml.swc 54 2005-04-13 13:29:28Z gvwilson $"/> </head> <body> <table cellpadding="3" border="1"> <tr> <td align="center"><em>Unordered List</em></td> <td align="center"><em>Ordered List</em></td> </tr> <tr> <td align="left" valign="top"> <ul> <li>Hydrogen</li> <li>Lithium</li> <li>Sodium</li> <li>Potassium</li> <li>Rubidium</li> <li>Cesium</li> <li>Francium</li> </ul> </td> <td align="left" valign="top"> <ol> <li>Helium</li> <li>Neon</li> <li>Argon</li> <li>Krypton</li> <li>Xenon</li> <li>Radon</li> </ol> </td> </tr> </table> </body> </html> |
meta
elements in document headimg
tagsrc
argument specifies where to find the image filealt
attribute to specify alternative texta
element to create a linkhref
attribute specifies what the link is pointing at<html> <head> <title>Links</title> <meta name="svn" content="$Id: xml.swc 54 2005-04-13 13:29:28Z gvwilson $"/> </head> <body> <h1>A Few of My Favorite Places</h1> <ul> <li><a href="http://www.google.com">Google</a></li> <li><a href="http://www.python.org">Python</a></li> <li><a href="http://www.nature.com/index.html">Nature Online</a></li> <li>Examples in this lecture: <ul> <li><a href="comments.html">Comments</a></li> <li><a href="image.html">Images</a></li> <li><a href="list_table.html">Lists and Tables</a></li> </ul> </li> </ul> </body> </html> |
Top 10 Accessible Web Authoring Practices
describes what you should do to make your pages more accessibleminidom
<?xml version="1.0" encoding="utf-8"?> <planet name="Mercury"> <period units="days">87.97</period> </planet>
import xml.dom.minidom doc = xml.dom.minidom.parse('mercury.xml') print doc.toxml('utf-8')
<?xml version="1.0" encoding="utf-8"?> <planet name="Mercury"> <period units="days">87.97</period> </planet>
toxml
method can be called on the document, or on any element node"utf-8"
as the character encodingimport xml.dom.minidom src = '''<planet name="Venus"> <period units="days">224.7</period> </planet>''' doc = xml.dom.minidom.parseString(src) print doc.toxml('utf-8')
<?xml version="1.0" encoding="utf-8"?> <planet name="Venus"> <period units="days">224.7</period> </planet>
import xml.dom.minidom impl = xml.dom.minidom.getDOMImplementation() doc = impl.createDocument(None, 'planet', None) root = doc.documentElement root.setAttribute('name', 'Mars') period = doc.createElement('period') root.appendChild(period) text = doc.createTextNode('686.98') period.appendChild(text) print doc.toxml('utf-8')
<?xml version="1.0" encoding="utf-8"?> <planet name="Mars"><period>686.98</period></planet>
xml.dom.minidom
is really just a wrapper around other platform-specific XML librariesdocument
nodecreateDocument
arecreateDocument
what type of element the document's root node should besetAttribute(attributeName, newValue)
import xml.dom.minidom src = '''<solarsystem> <planet name="Mercury"><period units="days">87.97</period></planet> <planet name="Venus"><period units="days">224.7</period></planet> <planet name="Earth"><period units="days">365.26</period></planet> </solarsystem> ''' def walkTree(currentNode, indent=0): spaces = ' ' * indent if currentNode.nodeType == currentNode.TEXT_NODE: print spaces + 'TEXT' + ' (%d)' % len(currentNode.data) else: print spaces + currentNode.tagName for child in currentNode.childNodes: walkTree(child, indent+1) doc = xml.dom.minidom.parseString(src) walkTree(doc.documentElement)
solarsystem TEXT (1) planet period TEXT (5) TEXT (1) planet period TEXT (5) TEXT (1) planet period TEXT (6) TEXT (1)
nodeType
ELEMENT_NODE
, TEXT_NODE
, ATTRIBUTE_NODE
, DOCUMENT_NODE
childNodes
data
class Visitor(object): def __init__(self): pass def visit(self, node): # When given the document, skip to the root. if node.nodeType == node.DOCUMENT_NODE: self.visit(node.documentElement) return # Handle other types of nodes. self.before(node) self.at(node) if node.nodeType == node.ELEMENT_NODE: for child in node.childNodes: self.visit(child) self.after(node) def doNothing(self, node): pass before = doNothing at = doNothing after = doNothing
Visitor.visit
with the root node of the tree they want to traverseclass Counter(Visitor): def __init__(self): Visitor.__init__(self) self.count = 0 def at(self, node): if node.nodeType == node.ELEMENT_NODE: self.count += 1
count
to zero before traversingif __name__ == '__main__': src = '<a><b>c</b><d>e</d><f>g<h/>i</f></a>' tree = xml.dom.minidom.parseString(src) c = Counter() c.visit(tree) assert c.count == 5
<em/>
element whose only child is a text node containing that word<em/>
getElementsByTagName
, and iterate over themdef emphasize(doc): paragraphs = doc.getElementsByTagName('p') for para in paragraphs: first = para.firstChild if first.nodeType == first.TEXT_NODE: emphasizeText(doc, para, first)
def emphasizeText(doc, para, textNode): # Look for optional spaces, a word, and the rest of the paragraph. m = re.match(r'^(\s*)(\S*)\b(.*)$', str(textNode.data)) if not m: return leadingSpace, firstWord, restOfText = m.groups() if not firstWord: return # If there's text after the first word, re-save it. if restOfText: restOfText = doc.createTextNode(restOfText) para.insertBefore(restOfText, para.firstChild) # Emphasize the first word. emph = doc.createElement('em') emph.appendChild(doc.createTextNode(firstWord)) para.insertBefore(emph, para.firstChild) # If there's leading space, re-save it. if leadingSpace: leadingSpace = doc.createTextNode(leadingSpace) para.insertBefore(leadingSpace, para.firstChild) # Get rid of the original text. para.removeChild(textNode)
if __name__ == '__main__': src = '''<html><body> <p>First paragraph.</p> <p>Second paragraph contains <em>emphasis</em>.</p> <p>Third paragraph.</p> </body></html>''' doc = xml.dom.minidom.parseString(src) emphasize(doc) print doc.toxml('utf-8')
<?xml version="1.0" encoding="utf-8"?> <html><body> <p><em>First</em> paragraph.</p> <p><em>Second</em> paragraph contains <em>emphasis</em>.</p> <p><em>Third</em> paragraph.</p> </body></html>
prev | Copyright © 2005, Python Software Foundation. See License for details. | next |