root/stuff/sandbox/sourceforge/xml-export.py
| Revision 350 (by effbot, 07/12/06 12:23:36) |
|---|
# $Id$ # example: generate XML export file import base64, os, re, sys import htmlload import extract def indent(elem, level=0): # prettify an element tree in place i = "\n" + level*" " if not elem.text or not elem.text.strip(): elem.text = i + " " for elem in elem: indent(elem, level+1) if not elem.tail or not elem.tail.strip(): elem.tail = i ET = htmlload.ET tracker = sys.argv[1] if tracker.startswith("tracker-"): dirname = tracker else: dirname = "tracker-" + tracker while not os.path.basename(dirname): dirname = os.path.dirname(dirname) out = open(dirname + ".xml", "w") out.write("<tracker id='%s'>\n" % tracker) files = extract.getpagefiles(dirname) for index, pagefile in enumerate(files): print >>sys.stderr, "\r", pagefile, "-", index, "of", len(files), try: info = extract.extract(pagefile) except IOError, v: print >>sys.stderr, "\n", "*** failed to read", pagefile, "-", v continue elem = ET.Element("item", id=info.get("item_id")) for key, value in sorted(info.items()): if isinstance(value, basestring): ET.SubElement(elem, key).text = value elif key == "attachments": for attachment in value: file_id = attachment["file_id"] e = ET.SubElement(elem, "attachment", file_id=file_id) for k, v in sorted(attachment.items()): if k == "file_id": continue ET.SubElement(e, k).text = v try: message, data = extract.extractdata(pagefile, file_id) except IOError: pass else: data = data.read() s = ET.SubElement(e, "data", encoding="base64") s.text = "\n" + base64.encodestring(data) elif key == "comments": for comment in value: e = ET.SubElement(elem, "comment") for k, v in sorted(comment.items()): ET.SubElement(e, k).text = v else: print >>sys.stderr, "ignoring", key, value, "..." indent(elem, 1) elem.tail = "\n" ET.ElementTree(elem).write(out) # write end marker (in case we crash) pos = out.tell() out.write("</tracker>\n") out.seek(pos) out.close() print print out.name, "ok"
Note: See TracBrowser for help on using the browser.
