|
| 1 | +#!/usr/bin/python |
| 2 | + |
| 3 | +# Given a template (with a specific format), a target document root and a set of formatted XML |
| 4 | +# documents, generate HTML documentation for public web access. |
| 5 | + |
| 6 | +# Extracts information from XML using regular expression and proper parsing |
| 7 | + |
| 8 | + |
| 9 | +from sys import argv, stderr, exit |
| 10 | + |
| 11 | +if len(argv) < 3: |
| 12 | + print >>stderr,"Usage:",argv[0]," <template.html> <target-doc-directory> [source XML document ... ]" |
| 13 | + exit(5) |
| 14 | + |
| 15 | +template = argv[1] |
| 16 | +targetDir = argv[2] |
| 17 | + |
| 18 | +f = open(template) |
| 19 | +bp = f.read() |
| 20 | +f.close() |
| 21 | + |
| 22 | +doc = bp |
| 23 | + |
| 24 | +import libxml2 |
| 25 | +import os.path |
| 26 | + |
| 27 | +libxml2.substituteEntitiesDefault(True) |
| 28 | + |
| 29 | +def asText(x): |
| 30 | + d = libxml2.parseDoc(x) |
| 31 | + return d.xpathCastNodeToString() |
| 32 | + |
| 33 | +import re |
| 34 | + |
| 35 | +wsRE = re.compile('\s+') |
| 36 | + |
| 37 | +def trimWS(s): |
| 38 | + s = wsRE.sub(' ', s) |
| 39 | + if s and s[0] == ' ': |
| 40 | + s = s[1:] |
| 41 | + if s and s[-1] == ' ': |
| 42 | + s = s[:-1] |
| 43 | + |
| 44 | + return s |
| 45 | + |
| 46 | +secRe = re.compile("<div id='(\w+)'>\n(.*?\n)</div>\n", re.DOTALL) |
| 47 | + |
| 48 | +import codecs |
| 49 | + |
| 50 | +def writeDoc(x, h): |
| 51 | + f = open(x) |
| 52 | + t = f.read() |
| 53 | + f.close() |
| 54 | + |
| 55 | + doc = bp |
| 56 | + |
| 57 | + # Get the title |
| 58 | + xd = libxml2.parseFile(x) |
| 59 | + ctxt = xd.xpathNewContext() |
| 60 | + ctxt.xpathRegisterNs('html', 'http://www.w3.org/1999/xhtml') |
| 61 | + |
| 62 | + title = ctxt.xpathEvalExpression('string(/fvdoc//html:div[@id="message"])') |
| 63 | + |
| 64 | + title = trimWS(title) |
| 65 | + doc = doc.replace('<title></title>', '<title>' + title + '</title>') |
| 66 | + |
| 67 | + |
| 68 | + for (sec, txt) in secRe.findall(t): |
| 69 | + r = re.compile('<h2>' + sec + '</h2>\s*<div class="docbody">\s*()</div>', re.IGNORECASE) |
| 70 | + idx = r.search(doc).start(1) |
| 71 | + doc = doc[:idx] + txt + doc[idx:] |
| 72 | + |
| 73 | + c = codecs.getdecoder('utf-8') |
| 74 | + |
| 75 | + doc = c(doc)[0] |
| 76 | + |
| 77 | + c = codecs.getencoder('iso-8859-1') |
| 78 | + |
| 79 | + f = open(h, 'w') |
| 80 | + f.write(c(doc, 'xmlcharrefreplace')[0]) |
| 81 | + f.close() |
| 82 | + |
| 83 | +for f in argv[3:]: |
| 84 | + sp = os.path.abspath(f) |
| 85 | + |
| 86 | + if not(os.path.isfile(sp)): |
| 87 | + continue |
| 88 | + |
| 89 | + category = os.path.split(os.path.dirname(sp))[1] |
| 90 | + filename = os.path.basename(sp) |
| 91 | + |
| 92 | + if not(category): |
| 93 | + continue |
| 94 | + |
| 95 | + (name, ext) = os.path.splitext(filename) |
| 96 | + |
| 97 | + if ext == '.xml': |
| 98 | + writeDoc(sp, os.path.join(targetDir, category, name + '.html')) |
| 99 | + else: |
| 100 | + print >>stderr,"Ignoring",f |
0 commit comments