root/stuff/sandbox/sourceforge/xml-export.py

Revision 350 (by effbot, 07/12/06 12:23:36)

robustification:
- strip off trailing separator from directory name (from Humberto Diógenes)
- skip files that cannot be opened/read

# $Id$
# example: generate XML export file

import base64, os, re, sys
import htmlload
import extract

def indent(elem, level=0):
    # prettify an element tree in place
    i = "\n" + level*"  "
    if not elem.text or not elem.text.strip():
        elem.text = i + "  "
    for elem in elem:
        indent(elem, level+1)
    if not elem.tail or not elem.tail.strip():
        elem.tail = i

ET = htmlload.ET

tracker = sys.argv[1]
if tracker.startswith("tracker-"):
    dirname = tracker
else:
    dirname = "tracker-" + tracker

while not os.path.basename(dirname):
    dirname = os.path.dirname(dirname)

out = open(dirname + ".xml", "w")

out.write("<tracker id='%s'>\n" % tracker)

files = extract.getpagefiles(dirname)

for index, pagefile in enumerate(files):
    print >>sys.stderr, "\r", pagefile, "-", index, "of", len(files),
    try:
	info = extract.extract(pagefile)
    except IOError, v:
	print >>sys.stderr, "\n", "*** failed to read", pagefile, "-", v
	continue
    elem = ET.Element("item", id=info.get("item_id"))
    for key, value in sorted(info.items()):
        if isinstance(value, basestring):
            ET.SubElement(elem, key).text = value
        elif key == "attachments":
            for attachment in value:
                file_id = attachment["file_id"]
                e = ET.SubElement(elem, "attachment", file_id=file_id)
                for k, v in sorted(attachment.items()):
                    if k == "file_id":
                        continue
                    ET.SubElement(e, k).text = v
                try:
                    message, data = extract.extractdata(pagefile, file_id)
                except IOError:
                    pass
                else:
                    data = data.read()
                    s = ET.SubElement(e, "data", encoding="base64")
                    s.text = "\n" + base64.encodestring(data)
        elif key == "comments":
            for comment in value:
                e = ET.SubElement(elem, "comment")
                for k, v in sorted(comment.items()):
                    ET.SubElement(e, k).text = v
        else:
            print >>sys.stderr, "ignoring", key, value, "..."
    indent(elem, 1)
    elem.tail = "\n"
    ET.ElementTree(elem).write(out)
    # write end marker (in case we crash)
    pos = out.tell()
    out.write("</tracker>\n")
    out.seek(pos)


out.close()

print
print out.name, "ok"
Note: See TracBrowser for help on using the browser.