root/pydotorg/pyfaq/htmlload.py
| Revision 140 (by effbot, 03/17/06 03:40:37) |
|---|
# $Id$ # XHTML/HTML loader import os import cElementTree as ET NS_XHTML = "{http://www.w3.org/1999/xhtml}" ## # Loads an XHTML or HTML file into an Element structure. Note that # HTML files are converted to XHTML in place, via <b>tidy</b>. def load(file, loader=None): if not loader: loader = ET.parse try: elem = loader(file) except: # FIXME: needs locking! (atomic rename should be good enough) os.system("tidy -qnm -asxml \"%s\"" % file) elem = loader(file) # if this fails, the file was too broken # clean up namespace for node in elem.getiterator(): if node.tag.startswith(NS_XHTML): node.tag = node.tag[len(NS_XHTML):] return elem
Note: See TracBrowser for help on using the browser.
