From eabb3cf9803fd2dc8ecc88b9f8e3e254d592366f Mon Sep 17 00:00:00 2001 From: Richard Henwood Date: Wed, 20 Jul 2011 13:01:47 -0500 Subject: [PATCH] LUDOC-13 render diff of html manual to enhance reviewability. Reviewing manual changes is challenging when only observing docbook xml. This change provides code (./tools/diff.py) and a 'diff' make target to generate a html page with annotated differences. An example screenshot is attached to LUDOC-13. Signed-off-by: Richard Henwood Change-Id: Ib19bae429235f9bf2b2b8d15a597baac8f12cc6e --- Makefile | 32 ++++++++++++++++++++++++++++- tools/diff.py | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 1 deletion(-) create mode 100755 tools/diff.py diff --git a/Makefile b/Makefile index 441b96f..b733cb3 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,13 @@ SRC_XML=$(wildcard *.xml) SRC_IMG=$(wildcard figures/*.png) SRCS=$(SRC_XML) $(SRC_IMG) +TEMP=/tmp TGT_BASE=lustre_manual +MASTER_URL=http://build.whamcloud.com/job/lustre-manual/lastSuccessfulBuild/ +MASTER_XHTML=$(MASTER_URL)/artifact/_out/$(TGT_BASE).xhtml +TGT_MASTER=$(TEMP)/mastermanual + RNG_LIN=/usr/share/xml/docbook/schema/rng/5.0/docbookxi.rng RNG_MAC=/opt/local/share/xml/docbook/5.0/rng/docbookxi.rng @@ -18,7 +23,7 @@ check: $(SRC_XML) xmllint --noout --xinclude --noent --relaxng $(RNG) ./index.xml # Note: can't use "suffix" instead of "subst", because it keeps the '.' -$(TGT_BASE).html $(TGT_BASE).fo: $(SRCS) +$(TGT_BASE).html $(TGT_BASE).xhtml $(TGT_BASE).fo: $(SRCS) xsltproc --stringparam fop1.extensions 1 \ --stringparam section.label.includes.component.label 1 \ --stringparam section.autolabel 1 \ @@ -32,9 +37,34 @@ $(TGT_BASE).pdf: $(TGT_BASE).fo .PHONY: html html: $(TGT_BASE).html +.PHONY: xhtml +xhtml: $(TGT_BASE).xhtml + .PHONY: pdf pdf: $(TGT_BASE).pdf +# get the git hash for the last successful build of the manual +.PHONY: mastermanual.revision +mastermanual.revision: + wget -O mastermanual.index $(MASTER_URL) + awk '/Revision/ { print $$NF }' mastermanual.index > mastermanual.revision + +# only fetch the full manual if we don't have it or the manual changed +$(TGT_MASTER).xhtml: mastermanual.revision + if ! cmp -s mastermanual.revision $(TGT_MASTER).revision ; then\ + wget -O $(TGT_MASTER).xhtml $(MASTER_XHTML) && \ + mv mastermanual.revision $(TGT_MASTER).revision;\ + fi + +.PHONY: diff +diff: $(TGT_BASE).xhtml $(TGT_MASTER).xhtml + ./tools/diff.py $(TGT_MASTER).xhtml $(TGT_BASE).xhtml > $(TGT_BASE).diff + + .PHONY: push push: git push ssh://review.whamcloud.com:29418/doc/manual HEAD:refs/for/master + +.PHONY: clean +clean: + rm $(TGT_BASE).html $(TGT_BASE).xhtml $(TGT_BASE).pdf diff --git a/tools/diff.py b/tools/diff.py new file mode 100755 index 0000000..0c8036c --- /dev/null +++ b/tools/diff.py @@ -0,0 +1,66 @@ +#!/usr/bin/python +"""HTML Diff: http://www.aaronsw.com/2002/diff +Rough code, badly documented. Send me comments and patches.""" + +__author__ = 'Aaron Swartz , Richard Henwood ' +__copyright__ = '(C) 2003 Aaron Swartz. GNU GPL 2 or 3.' +__version__ = '0.23' + +import difflib, string + +def isTag(x): return x[0] == "<" and x[-1] == ">" + +def textDiff(a, b): + """Takes in strings a and b and returns a human-readable HTML diff.""" + + out = [] + a, b = html2list(a), html2list(b) + s = difflib.SequenceMatcher(None, a, b) + for e in s.get_opcodes(): + if e[0] == "replace": + # @@ need to do something more complicated here + # call textDiff but not for html, but for some html... ugh + # gonna cop-out for now + out.append(''+''.join(a[e[1]:e[2]]) + ''+''.join(b[e[3]:e[4]])+"") + elif e[0] == "delete": + out.append('' + ''.join(a[e[1]:e[2]]) + "") + elif e[0] == "insert": + out.append(''+''.join(b[e[3]:e[4]]) + "") + elif e[0] == "equal": + out.append(''.join(b[e[3]:e[4]])) + else: + raise "Um, something's broken. I didn't expect a '" + `e[0]` + "'." + return ''.join(out) + +def html2list(x, b=0): + mode = 'char' + cur = '' + out = [] + for c in x: + if mode == 'tag': + if c == '>': + if b: cur += ']' + else: cur += c + out.append(cur); cur = ''; mode = 'char' + else: cur += c + elif mode == 'char': + if c == '<': + out.append(cur) + if b: cur = '[' + else: cur = c + mode = 'tag' + elif c in string.whitespace: out.append(cur+c); cur = '' + else: cur += c + out.append(cur) + return filter(lambda x: x is not '', out) + +if __name__ == '__main__': + import sys + try: + a, b = sys.argv[1:3] + except ValueError: + print "htmldiff: highlight the differences between two html files" + print "usage: " + sys.argv[0] + " a b" + sys.exit(1) + print textDiff(open(a).read(), open(b).read()) + -- 1.8.3.1