From b81442f7006294bf436b5c027232dc90641c16b7 Mon Sep 17 00:00:00 2001
From: Richard Henwood
Date: Fri, 10 Feb 2012 14:20:28 -0600
Subject: [PATCH] LUDOC-13: enhanced diff to work with larger changes.
Significant changes were needed to work with multi-line changes
that included changes in markup between master and review manual.
NOTE: The resulting html may not be valid, but will probably render
in the browser.
Signed-off-by: Richard Henwood
Change-Id: I60c724afca1ac6d89e3372666f9c7962efb73360
---
tools/diff.py | 215 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 196 insertions(+), 19 deletions(-)
diff --git a/tools/diff.py b/tools/diff.py
index f407784..582b465 100755
--- a/tools/diff.py
+++ b/tools/diff.py
@@ -2,46 +2,118 @@
"""HTML Diff: http://www.aaronsw.com/2002/diff
Rough code, badly documented. Send me comments and patches."""
-__author__ = 'Aaron Swartz , Richard Henwood '
-__copyright__ = '(C) 2003 Aaron Swartz. GNU GPL 2 or 3.'
-__version__ = '0.23'
+__author__ = 'Aaron Swartz , \
+Richard Henwood '
+__copyright__ = '(C) 2003 Aaron Swartz, (C) 2011,2012 Whamcloud, Inc \
+GNU GPL 2 or 3.'
+__version__ = '1.0'
-import difflib, string
+import difflib
+import string
+import re
def isTag(x): return x[0] == "<" and x[-1] == ">"
def textDiff(a, b):
"""Takes in strings a and b and returns a human-readable HTML diff."""
+ changeCounter = 0
out = []
a, b = html2list(a), html2list(b)
s = difflib.SequenceMatcher(None, a, b)
+ gap = 0;
+ wait = 7; # fudge factor to avoid having too many changes anchors.
for e in s.get_opcodes():
if e[0] == "replace":
# @@ need to do something more complicated here
# call textDiff but not for html, but for some html... ugh
# gonna cop-out for now
- # I've put this hack in to try accomodate id's generated by docbook compilation. RJH 30/12/2011
- if ''+''.join(a[e[1]:e[2]]) + ''+''.join(b[e[3]:e[4]])+"")
+ if gap > wait:
+ out.append('' % changeCounter)
+ changeCounter += 1
+ withopen = addOpen(a[e[1]:e[2]])
+ out.append(annotateTags(withopen, 'delete'));
+ out.append(annotateTags(b[e[3]:e[4]], 'insert'));
+ gap = 0
elif e[0] == "delete":
- out.append('' + ''.join(a[e[1]:e[2]]) + "")
+ if gap > wait:
+ out.append('' % changeCounter)
+ changeCounter += 1
+ out.append(annotateTags(a[e[1]:e[2]], 'delete'));
+ gap = 0
elif e[0] == "insert":
- out.append(''+''.join(b[e[3]:e[4]]) + "")
+ if gap > wait:
+ out.append('' % changeCounter)
+ changeCounter += 1
+ out.append(annotateTags(b[e[3]:e[4]], 'insert'));
+ gap = 0
elif e[0] == "equal":
out.append(''.join(b[e[3]:e[4]]))
+ gap += 1
else:
raise "Um, something's broken. I didn't expect a '" + `e[0]` + "'."
- return ''.join(out)
+ return [''.join(out), changeCounter]
+
+# this functiona finds all unmatched
+# close tags in htmllist and prepends
+# corresponding open tags to htmllist.
+def addOpen(htmllist):
+ unmatched = []
+ tmpstack = []
+ for item in htmllist:
+ if "<" in item:
+ if "" in item:
+ try:
+ if tmpstack[-1] == item:
+ tmpstack.pop()
+ else:
+ tmpstack.append(item)
+ except IndexError:
+ unmatched.append(item)
+ pass
+ pass
+ else:
+ tmpstack.append(re.sub(r'<(\w+).*', r'\g<1>>', item, flags=re.IGNORECASE))
+ pass
+ else:
+ pass
+ unmatched = map(lambda x: x.replace('/', ''), unmatched)
+ unmatched.reverse()
+ return unmatched + htmllist
+
+def annotateTags(htmllist, className):
+ res = map(lambda tag: addClass(tag, className), htmllist)
+ #the first element should be surrounded with a span + class
+ #provided it is not a tag.
+ if '<' not in htmllist[0]:
+ res[0] = '%s' % (className, htmllist[0])
+ return ''.join(res)
+
+# this method adds a new class to a html tag. i.e.
+# ->
+# ->
+def addClass (tag, newClass):
+ if "<" in tag and "" not in tag:
+ if 'class' not in tag:
+ # assume all tags end in '>', so chop it off:
+ tag = tag[:-1]
+ # add in an empty class element.
+ tag = '%s class="">' % tag
+ tag = re.sub(r'(.*)class=[\'"]([ \w]*)[\'"]',
+ r'\g<1>class="\g<2> '+newClass+'"', tag , flags=re.IGNORECASE)
+ return tag
def html2list(x, b=0):
mode = 'char'
@@ -65,6 +137,103 @@ def html2list(x, b=0):
out.append(cur)
return filter(lambda x: x is not '', out)
+def test ():
+ print "\nrunning tests..."
+ test1res = addClass('', 'test')
+ test1suc = ''
+ if test1res == test1suc:
+ print "SUCCESS: %s == %s" % (test1res, test1suc)
+ else:
+ print "FAIL: %s != %s" % (test1res, test1suc)
+ test2res = addClass('', 'test2')
+ test2suc = ''
+ if test2res == test2suc:
+ print "SUCCESS: %s == %s" % (test2res, test2suc)
+ else:
+ print "FAIL: %s != %s" % (test2res, test2suc)
+
+ test3res = addOpen(['hello','', '
'])
+ test3suc = ['', '
', 'hello','
', '']
+ if test3res == test3suc:
+ print "SUCCESS: %s == %s" % (test3res, test3suc)
+ else:
+ print "FAIL: %s != %s" % (test3res, test3suc)
+
+ test4res = addOpen(['hello','', '', 'test', '
', ''])
+ test4suc = ['', '
', 'hello','
', '', 'test', '
', '']
+ if test4res == test4suc:
+ print "SUCCESS: %s == %s" % (test4res, test4suc)
+ else:
+ print "FAIL: %s != %s" % (test4res, test4suc)
+
+ test5res = addOpen(['0','', '', '', '', 'test', '
'])
+ test5suc = ['', '', 'test', '
']
+ if test5res == test5suc:
+ print "SUCCESS: %s == %s" % (test5res, test5suc)
+ else:
+ print "FAIL: %s != %s" % (test5res, test5suc)
+
+ test6res = annotateTags(['', '0', '
'], 'test')
+ test6suc = '0
'
+ if test6res == test6suc:
+ print "SUCCESS: %s == %s" % (test6res, test6suc)
+ else:
+ print "FAIL: %s != %s" % (test6res, test6suc)
+
+ test7res = addOpen(['0','', '', '', '', ''])
+ test7suc = ['']
+ if test7res == test7suc:
+ print "SUCCESS: %s == %s" % (test7res, test7suc)
+ else:
+ print "FAIL: %s != %s" % (test7res, test7suc)
+
+ pass
+
+def jsHeader (changeCounter):
+ jscript = """
+
+ """ % changeCounter
+ return jscript;
+
+def cssHeader ():
+
+ css = """
+ """
+ return css
+
+
if __name__ == '__main__':
import sys
try:
@@ -72,6 +241,14 @@ if __name__ == '__main__':
except ValueError:
print "htmldiff: highlight the differences between two html files"
print "usage: " + sys.argv[0] + " a b"
+ test()
sys.exit(1)
- print textDiff(open(a).read(), open(b).read())
-
+ changeCounter = 0
+ (diffxhtml, changeCounter) = textDiff(open(a).read(), open(b).read())
+ # it is, regrettably, possible that the result of textDiff is not
+ # valid xhtml. I have noticed issues with nested 's for example.
+ # Because of this, search and replace to insert our css, js and dom
+ # elements:
+ diffxhtml = diffxhtml.replace('
', cssHeader() + jsHeader(changeCounter) +
+ 'No changes detected.
')
+ print diffxhtml
--
1.8.3.1