prev | Draft Version 398 (Thu Dec 1 09:18:46 2005) | next |
<lec/>
element with title
and id
attributes<topic/>
elementstitle
attributesummary
attribute (used to construct the syllabus)<slide/>
elements<b1/>
(for “bullet level 1”), which contain <b2/>
, and so on<lec/>
element matches the filename#!/usr/bin/env python '''Check for tabs in one or more files.''' import sys def checkTabs(filename): '''Look for tabs.''' infile = open(filename, 'r') for line in infile.readlines(): if line.find('\t') >= 0: print '%s contains tabs' % filename break infile.close() if __name__ == '__main__': for filename in sys.argv[1:]: checkTabs(filename)
sys.stdin
is an already-open file, not a filenamecheckTabs
to take both the filename, and an open fileopen
and close
to the main body#!/usr/bin/env python '''Check for tabs in one or more files, or on standard input.''' import sys def checkTabs(filename, infile): '''Look for tabs.''' for line in infile.readlines(): if line.find('\t') >= 0: print '%s contains tabs' % filename break if __name__ == '__main__': if len(sys.argv) == 1: checkTabs('<stdin>', sys.stdin) else: for filename in sys.argv[1:]: infile = open(filename, 'r') checkTabs(filename, infile) infile.close()
IOError
#!/usr/bin/env python '''Check for tabs in one or more files, or on standard input, and report errors.''' import sys def checkTabs(filename, infile): '''Look for tabs.''' for line in infile.readlines(): if line.find('\t') >= 0: print '%s contains tabs' % filename break if __name__ == '__main__': try: if len(sys.argv) == 1: checkTabs('<stdin>', sys.stdin) else: for filename in sys.argv[1:]: infile = open(filename, 'r') checkTabs(filename, infile) infile.close() except IOError, e: print >> sys.stderr, e
else
(since we don't think I/O errors can happen while reading standard input)for
(so that if an error occurs while reading one file, the program continues on to the next)python check_tabs.py file1 fil2 file…
will work…lec
for lecture notes (in .swc
files)util
for utility programs (like the validation tools)img
for imageslec/xyz.swc
go in the img/xyz
directorysrc
for source codesrc/xyz
holds sample files for the XYZ lecture.swc
files# Re-make everything used in the Software Carpentry course. all : @echo 'options: clean validate' clean : @rm -f *~ */*~ */*/*~ validate : @python util/check_tabs.py lec/*.swc
clean
target that gets rid of editor backup files ending in ~
all
, that lists the things the Makefile can do"@"
in front of the commands means, “Don't echo the command before running it”"---"
into "—"
, etc.
string.printable
getopt
modulesys.argv[0]
, which is the name of the program"a:bcd:"
means “-a and -d have an argument, -b and -c don't”doPrintable = True doTabs = True settings, filenames = getopt.getopt(sys.argv[1:], 'pt') for (opt, arg) in settings: if opt == '-p': doPrintable = False elif opt == '-t': doTabs = False
checkTabs
checkPrintable
need to process the same datatry: if not filenames: lines = sys.stdin.readlines() checkTabs('<stdin>', lines) checkPrintable('<stdin>', lines) else: for filename in filenames: infile = open(filename, 'r') lines = infile.readlines() infile.close() checkTabs(filename, lines) checkPrintable(filename, lines) except IOError, e: print >> sys.stderr, e
checkTabs
is easydef checkTabs(filename, lines): '''Look for tabs.''' for line in lines: if line.find('\t') >= 0: print '%s contains tabs' % filename break
checkPrintable
is simple as welldef checkPrintable(filename, lines): '''Look for non-printable characters.''' for line in lines: for c in line: if c not in string.printable: print '%s contains non-printable characters' % filename print line break
<d ref="immutable">Immutable</d>
<d/>
for “definition”ref
attribute is the term that appears in the glossary<glossary> <glosssec title="A"> <glossitem id="absolute_path" term="absolute path">...definition...</glossitem> <glossitem id="abstract_data_types" term="abstract data types">...</glossitem> <glossitem id="access_control" term="access control">...</glossitem> ... <glossitem id="automatic_variables" term="automatic variables (in Make)">...</glossitem> </glosssec> ... </glossary>
glossary = None doGlossaryComplete = True doPrintable = True doTabs = True settings, filenames = getopt.getopt(sys.argv[1:], 'G:gpt') for (opt, arg) in settings: if opt == '-G': glossary = arg elif opt == '-g': doGlossaryComplete = False elif opt == '-p': doPrintable = False elif opt == '-t': doTabs = False
['<stdin>']
readFile
to open and read a file"<stdin>"
, it reads from sys.stdin
try: if glossary: glossary = readGlossaryFile(glossary) if not filenames: filenames = ['<stdin>'] for filename in filenames: lines, doc = readFile(filename) checkTabs(filename, lines) checkPrintable(filename, lines) if glossary: checkGlossary(filename, doc, glossary) if glossary and doGlossaryComplete: checkGlossaryComplete(glossary) except IOError, e: print >> sys.stderr, e except xml.parsers.expat.ExpatError, e: print >> sys.stderr, e
readFile
def readFile(filename): if filename == '<stdin>': data = sys.stdin.read() else: infile = open(filename, 'r') data = infile.read() infile.close() infile = cStringIO.StringIO(data) lines = infile.readlines() doc = xml.dom.minidom.parseString(data) return lines, doc
readGlossaryFile
builds a dictionary whose keys are the terms defined in the glossaryNone
for now—see why in a momentdef readGlossaryFile(filename): if filename is None: return None infile = open(filename, 'r') doc = xml.dom.minidom.parse(infile) terms = doc.getElementsByTagName('glossitem') result = {} for term in terms: t = str(term.getAttribute('id')) result[t] = None return result
checkGlossary
processes uses of glossary terms in a single lecture fileglossary
dictionarydef checkGlossary(filename, doc, glossary): defns = doc.getElementsByTagName('d') for defn in defns: d = str(defn.getAttribute('ref')) if d not in glossary: print 'term %s in %s missing from glossary' % (d, filename) elif glossary[d] is not None: print 'term %s defined in %s and %s' % (d, filename, glossary[d]) else: glossary[d] = filename
checkGlossaryComplete
looks for glossary entries without associated filenamesdef checkGlossaryComplete(glossary): unused = [] for g in glossary: if glossary[g] is None: unused.append(g) if unused: unused.sort() print 'unused terms' for u in unused: print '\t%s' % u
-I dir
specifies the root directory for images-C dir
specifies the root directory for code fragmentsid
attribute to determine which particular subdirectory to searchgetopt.getopt
string, and four more lines to the main processing loopif codeRootDir: checkFiles(filename, doc, codeRootDir, 'code', 'src') if imageRootDir: checkFiles(filename, doc, imageRootDir, 'img', 'src')
checkFiles
.svn
directorySet
automatically handles multiple references to a single source filedef checkFiles(filename, doc, rootDir, eltName, attrName): # What should we ignore? Excludes = ['.svn'] # Find out where we're supposed to look. docId = str(doc.documentElement.getAttribute('id')) dir = os.path.join(rootDir, docId) if not os.path.isdir(dir): print >> sys.stderr, 'Missing directory: %s' % dir return # Find out what's there that we care about. actual = Set(os.listdir(dir)) for e in Excludes: actual.discard(e) # Find what's used in the document. elts = doc.getElementsByTagName(eltName) referenced = Set() for e in elts: if e.hasAttribute(attrName): referenced.add(str(e.getAttribute(attrName))) # Show differences (if any). showDiff(filename, dir, 'not found', referenced - actual) showDiff(filename, dir, 'unused', actual - referenced)
showDiff
def showDiff(filename, dir, title, values): if len(values): print '%s (for file %s and directory %s):' % (title, filename, dir) for v in values: print '\t%s' % v
make clean
and make validate
before doing a commitsummary
values from each <topic/>
element to create HTML version of course syllabusExercise 19.1:
What does getopt
do when it encounters an argument it
doesn't recognize? Write a short program that demonstrates this
behavior, that can be run on its own without the user passing in
any command-line arguments.
prev | Copyright © 2005, Python Software Foundation. See License for details. | next |