Whamcloud - gitweb
LUDOC-13 render diff of html manual to enhance reviewability. 27/1127/5
authorRichard Henwood <rhenwood@whamcloud.com>
Wed, 20 Jul 2011 18:01:47 +0000 (13:01 -0500)
committerRichard Henwood <rhenwood@whamcloud.com>
Mon, 8 Aug 2011 19:59:35 +0000 (14:59 -0500)
Reviewing manual changes is challenging when only observing docbook
xml. This change provides code (./tools/diff.py) and a 'diff' make
target to generate a html page with annotated differences. An
example screenshot is attached to LUDOC-13.

Signed-off-by: Richard Henwood <rhenwood@whamcloud.com>
Change-Id: Ib19bae429235f9bf2b2b8d15a597baac8f12cc6e

Makefile
tools/diff.py [new file with mode: 0755]

index 441b96f..b733cb3 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,13 @@
 SRC_XML=$(wildcard *.xml)
 SRC_IMG=$(wildcard figures/*.png)
 SRCS=$(SRC_XML) $(SRC_IMG)
+TEMP=/tmp
 
 TGT_BASE=lustre_manual
+MASTER_URL=http://build.whamcloud.com/job/lustre-manual/lastSuccessfulBuild/
+MASTER_XHTML=$(MASTER_URL)/artifact/_out/$(TGT_BASE).xhtml
+TGT_MASTER=$(TEMP)/mastermanual
+
 
 RNG_LIN=/usr/share/xml/docbook/schema/rng/5.0/docbookxi.rng
 RNG_MAC=/opt/local/share/xml/docbook/5.0/rng/docbookxi.rng
@@ -18,7 +23,7 @@ check: $(SRC_XML)
        xmllint --noout --xinclude --noent --relaxng $(RNG) ./index.xml
 
 # Note: can't use "suffix" instead of "subst", because it keeps the '.'
-$(TGT_BASE).html $(TGT_BASE).fo: $(SRCS)
+$(TGT_BASE).html $(TGT_BASE).xhtml $(TGT_BASE).fo: $(SRCS)
        xsltproc --stringparam fop1.extensions  1 \
                --stringparam section.label.includes.component.label 1 \
                --stringparam section.autolabel 1 \
@@ -32,9 +37,34 @@ $(TGT_BASE).pdf: $(TGT_BASE).fo
 .PHONY: html
 html: $(TGT_BASE).html
 
+.PHONY: xhtml
+xhtml: $(TGT_BASE).xhtml
+
 .PHONY: pdf
 pdf: $(TGT_BASE).pdf
 
+# get the git hash for the last successful build of the manual
+.PHONY: mastermanual.revision
+mastermanual.revision:
+       wget -O mastermanual.index $(MASTER_URL)
+       awk '/Revision/ { print $$NF }' mastermanual.index > mastermanual.revision
+
+# only fetch the full manual if we don't have it or the manual changed
+$(TGT_MASTER).xhtml: mastermanual.revision
+       if ! cmp -s mastermanual.revision $(TGT_MASTER).revision ; then\
+               wget -O $(TGT_MASTER).xhtml $(MASTER_XHTML) && \
+               mv mastermanual.revision $(TGT_MASTER).revision;\
+       fi
+
+.PHONY: diff
+diff: $(TGT_BASE).xhtml $(TGT_MASTER).xhtml
+       ./tools/diff.py $(TGT_MASTER).xhtml $(TGT_BASE).xhtml > $(TGT_BASE).diff
+
+
 .PHONY: push
 push:
        git push ssh://review.whamcloud.com:29418/doc/manual HEAD:refs/for/master
+
+.PHONY: clean
+clean:
+       rm $(TGT_BASE).html $(TGT_BASE).xhtml $(TGT_BASE).pdf
diff --git a/tools/diff.py b/tools/diff.py
new file mode 100755 (executable)
index 0000000..0c8036c
--- /dev/null
@@ -0,0 +1,66 @@
+#!/usr/bin/python
+"""HTML Diff: http://www.aaronsw.com/2002/diff
+Rough code, badly documented. Send me comments and patches."""
+
+__author__ = 'Aaron Swartz <me@aaronsw.com>, Richard Henwood <rhenwood@whamcloud.com>'
+__copyright__ = '(C) 2003 Aaron Swartz. GNU GPL 2 or 3.'
+__version__ = '0.23'
+
+import difflib, string
+
+def isTag(x): return x[0] == "<" and x[-1] == ">"
+
+def textDiff(a, b):
+    """Takes in strings a and b and returns a human-readable HTML diff."""
+
+    out = []
+    a, b = html2list(a), html2list(b)
+    s = difflib.SequenceMatcher(None, a, b)
+    for e in s.get_opcodes():
+        if e[0] == "replace":
+            # @@ need to do something more complicated here
+            # call textDiff but not for html, but for some html... ugh
+            # gonna cop-out for now
+            out.append('<span class="replace" style="background-color: Pink; text-decoration: line-through;">'+''.join(a[e[1]:e[2]]) + '</span><span class="insert" style="background-color: PaleGreen;">'+''.join(b[e[3]:e[4]])+"</span>")
+        elif e[0] == "delete":
+            out.append('<span class="del" style="background-color: Pink; text-decoration: line-through;">' + ''.join(a[e[1]:e[2]]) + "</span>")
+        elif e[0] == "insert":
+            out.append('<span class="ins" style="background-color: PaleGreen;">'+''.join(b[e[3]:e[4]]) + "</span>")
+        elif e[0] == "equal":
+            out.append(''.join(b[e[3]:e[4]]))
+        else:
+            raise "Um, something's broken. I didn't expect a '" + `e[0]` + "'."
+    return ''.join(out)
+
+def html2list(x, b=0):
+    mode = 'char'
+    cur = ''
+    out = []
+    for c in x:
+        if mode == 'tag':
+            if c == '>':
+                if b: cur += ']'
+                else: cur += c
+                out.append(cur); cur = ''; mode = 'char'
+            else: cur += c
+        elif mode == 'char':
+            if c == '<':
+                out.append(cur)
+                if b: cur = '['
+                else: cur = c
+                mode = 'tag'
+            elif c in string.whitespace: out.append(cur+c); cur = ''
+            else: cur += c
+    out.append(cur)
+    return filter(lambda x: x is not '', out)
+
+if __name__ == '__main__':
+    import sys
+    try:
+        a, b = sys.argv[1:3]
+    except ValueError:
+        print "htmldiff: highlight the differences between two html files"
+        print "usage: " + sys.argv[0] + " a b"
+        sys.exit(1)
+    print textDiff(open(a).read(), open(b).read())
+