From b81442f7006294bf436b5c027232dc90641c16b7 Mon Sep 17 00:00:00 2001 From: Richard Henwood Date: Fri, 10 Feb 2012 14:20:28 -0600 Subject: [PATCH] LUDOC-13: enhanced diff to work with larger changes. Significant changes were needed to work with multi-line changes that included changes in markup between master and review manual. NOTE: The resulting html may not be valid, but will probably render in the browser. Signed-off-by: Richard Henwood Change-Id: I60c724afca1ac6d89e3372666f9c7962efb73360 --- tools/diff.py | 215 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 196 insertions(+), 19 deletions(-) diff --git a/tools/diff.py b/tools/diff.py index f407784..582b465 100755 --- a/tools/diff.py +++ b/tools/diff.py @@ -2,46 +2,118 @@ """HTML Diff: http://www.aaronsw.com/2002/diff Rough code, badly documented. Send me comments and patches.""" -__author__ = 'Aaron Swartz , Richard Henwood ' -__copyright__ = '(C) 2003 Aaron Swartz. GNU GPL 2 or 3.' -__version__ = '0.23' +__author__ = 'Aaron Swartz , \ +Richard Henwood ' +__copyright__ = '(C) 2003 Aaron Swartz, (C) 2011,2012 Whamcloud, Inc \ +GNU GPL 2 or 3.' +__version__ = '1.0' -import difflib, string +import difflib +import string +import re def isTag(x): return x[0] == "<" and x[-1] == ">" def textDiff(a, b): """Takes in strings a and b and returns a human-readable HTML diff.""" + changeCounter = 0 out = [] a, b = html2list(a), html2list(b) s = difflib.SequenceMatcher(None, a, b) + gap = 0; + wait = 7; # fudge factor to avoid having too many changes anchors. for e in s.get_opcodes(): if e[0] == "replace": # @@ need to do something more complicated here # call textDiff but not for html, but for some html... ugh # gonna cop-out for now - # I've put this hack in to try accomodate id's generated by docbook compilation. RJH 30/12/2011 - if ''+''.join(a[e[1]:e[2]]) + ''+''.join(b[e[3]:e[4]])+"") + if gap > wait: + out.append('' % changeCounter) + changeCounter += 1 + withopen = addOpen(a[e[1]:e[2]]) + out.append(annotateTags(withopen, 'delete')); + out.append(annotateTags(b[e[3]:e[4]], 'insert')); + gap = 0 elif e[0] == "delete": - out.append('' + ''.join(a[e[1]:e[2]]) + "") + if gap > wait: + out.append('' % changeCounter) + changeCounter += 1 + out.append(annotateTags(a[e[1]:e[2]], 'delete')); + gap = 0 elif e[0] == "insert": - out.append(''+''.join(b[e[3]:e[4]]) + "") + if gap > wait: + out.append('' % changeCounter) + changeCounter += 1 + out.append(annotateTags(b[e[3]:e[4]], 'insert')); + gap = 0 elif e[0] == "equal": out.append(''.join(b[e[3]:e[4]])) + gap += 1 else: raise "Um, something's broken. I didn't expect a '" + `e[0]` + "'." - return ''.join(out) + return [''.join(out), changeCounter] + +# this functiona finds all unmatched +# close tags in htmllist and prepends +# corresponding open tags to htmllist. +def addOpen(htmllist): + unmatched = [] + tmpstack = [] + for item in htmllist: + if "<" in item: + if ">', item, flags=re.IGNORECASE)) + pass + else: + pass + unmatched = map(lambda x: x.replace('/', ''), unmatched) + unmatched.reverse() + return unmatched + htmllist + +def annotateTags(htmllist, className): + res = map(lambda tag: addClass(tag, className), htmllist) + #the first element should be surrounded with a span + class + #provided it is not a tag. + if '<' not in htmllist[0]: + res[0] = '%s' % (className, htmllist[0]) + return ''.join(res) + +# this method adds a new class to a html tag. i.e. +# -> +# -> +def addClass (tag, newClass): + if "<" in tag and "', so chop it off: + tag = tag[:-1] + # add in an empty class element. + tag = '%s class="">' % tag + tag = re.sub(r'(.*)class=[\'"]([ \w]*)[\'"]', + r'\g<1>class="\g<2> '+newClass+'"', tag , flags=re.IGNORECASE) + return tag def html2list(x, b=0): mode = 'char' @@ -65,6 +137,103 @@ def html2list(x, b=0): out.append(cur) return filter(lambda x: x is not '', out) +def test (): + print "\nrunning tests..." + test1res = addClass('', 'test') + test1suc = '' + if test1res == test1suc: + print "SUCCESS: %s == %s" % (test1res, test1suc) + else: + print "FAIL: %s != %s" % (test1res, test1suc) + test2res = addClass('', 'test2') + test2suc = '' + if test2res == test2suc: + print "SUCCESS: %s == %s" % (test2res, test2suc) + else: + print "FAIL: %s != %s" % (test2res, test2suc) + + test3res = addOpen(['hello','', '

']) + test3suc = ['

', '

', 'hello','
', '

'] + if test3res == test3suc: + print "SUCCESS: %s == %s" % (test3res, test3suc) + else: + print "FAIL: %s != %s" % (test3res, test3suc) + + test4res = addOpen(['hello','', '

', 'test', '

', '

']) + test4suc = ['

', '

', 'hello','
', '

', 'test', '

', '

'] + if test4res == test4suc: + print "SUCCESS: %s == %s" % (test4res, test4suc) + else: + print "FAIL: %s != %s" % (test4res, test4suc) + + test5res = addOpen(['0','

', '', '', '
', 'test', '
']) + test5suc = ['
', '
', '

', '0','

', '
', '
', '
', 'test', '
'] + if test5res == test5suc: + print "SUCCESS: %s == %s" % (test5res, test5suc) + else: + print "FAIL: %s != %s" % (test5res, test5suc) + + test6res = annotateTags(['
', '0', '
'], 'test') + test6suc = '
0
' + if test6res == test6suc: + print "SUCCESS: %s == %s" % (test6res, test6suc) + else: + print "FAIL: %s != %s" % (test6res, test6suc) + + test7res = addOpen(['0','

', '', '', '
', '
', 'test', '
', '
', '']) + test7suc = ['
', '
', '
', '

', '0','

', '
', '
', '
', '
', 'test', '
', '
', '
'] + if test7res == test7suc: + print "SUCCESS: %s == %s" % (test7res, test7suc) + else: + print "FAIL: %s != %s" % (test7res, test7suc) + + pass + +def jsHeader (changeCounter): + jscript = """ + + """ % changeCounter + return jscript; + +def cssHeader (): + + css = """ + """ + return css + + if __name__ == '__main__': import sys try: @@ -72,6 +241,14 @@ if __name__ == '__main__': except ValueError: print "htmldiff: highlight the differences between two html files" print "usage: " + sys.argv[0] + " a b" + test() sys.exit(1) - print textDiff(open(a).read(), open(b).read()) - + changeCounter = 0 + (diffxhtml, changeCounter) = textDiff(open(a).read(), open(b).read()) + # it is, regrettably, possible that the result of textDiff is not + # valid xhtml. I have noticed issues with nested

's for example. + # Because of this, search and replace to insert our css, js and dom + # elements: + diffxhtml = diffxhtml.replace('', cssHeader() + jsHeader(changeCounter) + + '

No changes detected.
') + print diffxhtml -- 1.8.3.1