#!/usr/bin/env python3 import re import subprocess import sys class Change(object): def __init__(self): self.commit = '' self.author_name = '' self.author_email = '' self.author_date = 0 self.subject = '' self.body = '' self.number = 0 self.change_id = '' self.reviewed_on = '' self.lustre_commit = '' self.lustre_change = '' self.lustre_change_number = 0 self.cray_bug_id = '' self.hpe_bug_id = '' self._parent = self self._rank = 0 def _find(self): if self._parent != self: self._parent = self._parent._find() return self._parent def _union(self, c2): r1 = self._find() r2 = c2._find() if r1._rank > r2._rank: r2._parent = r1 elif r1._rank < r2._rank: r1._parent = r2 elif r1 != r2: r2._parent = r1 r1._rank += 1 GIT_LOG_FIELDS = ['commit', 'author_name', 'author_email', 'author_date', 'subject', 'body'] GIT_LOG_KEYS = ['%H', '%an', '%ae', '%at', '%s', '%b'] GIT_LOG_FORMAT = '%x1f'.join(GIT_LOG_KEYS) + '%x1e' def _change_from_record(rec): change = Change() change.__dict__.update(dict(list(zip(GIT_LOG_FIELDS, rec.split('\x1f'))))) change.author_date = int(change.author_date) for line in change.body.splitlines(): # Sometimes we have 'key : value' so we strip both sides. lis = line.split(':', 1) if len(lis) == 2: key = lis[0].strip() val = lis[1].strip() if key in ['Change-Id', 'Reviewed-on', 'Lustre-commit', 'Lustre-change', 'Cray-bug-id', 'HPE-bug-id']: change.__dict__[key.replace('-', '_').lower()] = val obj = re.match(r'[A-Za-z]+://[\w\.]+/(\d+)$', change.reviewed_on) if obj: change.number = int(obj.group(1)) obj = re.match(r'[A-Za-z]+://[\w\.]+/(\d+)$', change.lustre_change) if obj: change.lustre_change_number = int(obj.group(1)) return change def _head(lis): if lis: return lis[0] else: return None class Branch(object): def __init__(self, name, paths): self.name = name self.paths = paths self.log = [] # Oldest commit is first. self.by_commit = {} # str -> change self.by_subject = {} # str -> list of changes self.by_change_id = {} # str -> list of changes self.by_number = {} # str -> list of changes def _add_change_from_record(self, rec): # TODO Handle reverted commits. change = _change_from_record(rec) self.log.append(change) assert change.commit assert change.commit not in self.by_commit self.by_commit[change.commit] = change assert change.subject lis = self.by_subject.setdefault(change.subject, []) # XXX Do we want this? # if lis: # lis[0]._union(change) lis.append(change) for bug_id in (change.cray_bug_id, change.hpe_bug_id): if bug_id and (' ' in change.subject): # Split subject in to issue and rest. issue, rest = change.subject.split(None, 1) # Make new subject using external bug id subject = ' '.join((bug_id, rest)) lis = self.by_subject.setdefault(subject, []) lis.append(change) # Equivalate by change_id. if change.change_id: lis = self.by_change_id.setdefault(change.change_id, []) if lis: lis[0]._union(change) lis.append(change) # Equivalate by number (from reviewed_on). if change.number: lis = self.by_number.setdefault(change.number, []) if lis: lis[0]._union(change) lis.append(change) def load(self): self.log = [] self.by_commit = {} self.by_subject = {} self.by_change_id = {} self.by_number = {} git_base = ['git'] # [, '--git-dir=' + self.path + '/.git'] # rc = subprocess.call(git_base + ['fetch', 'origin']) # assert rc == 0 pipe = subprocess.Popen(git_base + ['log', '--format=' + GIT_LOG_FORMAT, '--reverse', self.name ] + self.paths, stdout=subprocess.PIPE, text=True) out, _ = pipe.communicate() rc = pipe.wait() assert rc == 0 for rec in out.split('\x1e\n'): if rec: self._add_change_from_record(rec) def find_port(self, change): # Try to find a port of change in this branch. change may or # may not belong to branch. # # TODO Return oldest member of equivalence class. port = (self.by_commit.get(change.commit) or self.by_commit.get(change.lustre_commit) or self.by_commit.get(change.lustre_change) or # Handle misuse. _head(self.by_change_id.get(change.change_id)) or _head(self.by_change_id.get(change.lustre_commit)) or # ... _head(self.by_change_id.get(change.lustre_change)) or _head(self.by_number.get(change.number)) or # Do we need this? _head(self.by_number.get(change.lustre_change_number)) or _head(self.by_subject.get(change.subject))) # Do we want this? if port: return port._find() else: return None def branch_comm(b1, b2): n1 = len(b1.log) n2 = len(b2.log) i1 = 0 i2 = 0 printed = set() # commits def change_is_printed(c): return (c.commit in printed) or (c.lustre_commit in printed) def change_set_printed(c): printed.add(c.commit) if c.lustre_commit: printed.add(c.lustre_commit) # Suppress initial common commits. while i1 < n1 and i2 < n2: # XXX Should we use _find() on c1 and c2 here? # XXX Or c1 = b1.find_port(c1)? c1 = b1.log[i1] c2 = b2.log[i2] if c1.commit == c2.commit: i1 += 1 i2 += 1 continue else: break while i1 < n1 and i2 < n2: c1 = b1.log[i1] if change_is_printed(c1): i1 += 1 continue c2 = b2.log[i2] if change_is_printed(c2): i2 += 1 continue p1 = b1.find_port(c2) if p1 and change_is_printed(p1): change_set_printed(c2) i2 += 1 continue p2 = b2.find_port(c1) if p2 and change_is_printed(p2): change_set_printed(c1) i1 += 1 continue # Neither of c1 and c2 has been printed, nor has any port or either. # XXX Do we need c1._find() here? if c1 == p1 or c2 == p2: # c1 and c2 are ports of the same change. change_set_printed(c1) change_set_printed(c2) if p1: change_set_printed(p1) if p2: change_set_printed(p2) i1 += 1 i2 += 1 # c1 is common to both branches. print('\t\t%s\t%s' % (c1.commit, c1.subject)) # TODO Add a '*' if subjects different... continue if p1 and not p2: # b1 has c2, b2 does not have c1, (port of c2 must be after c1). change_set_printed(c1) i1 += 1 # c1 is unique to b1. print('%s\t\t\t%s' % (c1.commit, c1.subject)) continue if p2 and not p1: # b2 has c1, b1 does not have c2, (port of c1 must be after c2). change_set_printed(c2) i2 += 1 # c2 is unique to b2. print('\t%s\t\t%s' % (c2.commit, c2.subject)) continue # Now neither is ported or both are ported (and the order is weird). if p2: change_set_printed(c1) change_set_printed(p2) i1 += 1 # c1 is common to both branches. print('\t\t%s\t%s' % (c1.commit, c1.subject)) continue else: change_set_printed(c1) i1 += 1 # c1 is unique to b1. print('%s\t\t\t%s' % (c1.commit, c1.subject)) continue for c1 in b1.log[i1:]: if change_is_printed(c1): continue assert i2 == n2 # All commits from b2 have been printed. Therefore if c1 has # been ported to b2 then the port has already been printed. So # c1 is unique to b1 and must be printed. change_set_printed(c1) print('%s\t\t\t%s' % (c1.commit, c1.subject)) for c2 in b2.log[i2:]: if change_is_printed(c2): continue assert i1 == n1 # ... change_set_printed(c2) print('\t%s\t\t%s' % (c2.commit, c2.subject)) USAGE = """usage: '_PROGNAME_ BRANCH1 BRANCH2 [PATH]...' Compare commits to Lustre branches. Prints commits unique to BRANCH1 in column 1. Prints commits unique to BRANCH2 in column 2. Prints commits common to both branches in column 3. Prints commit subject in column 4. Skips initial common commits. The output format is inspired by comm(1). To filter commits by branch, pipe the output to awk. For example: $ ... | awk -F'\\t' '$1 != ""' # only commits unique to BRANCH1 $ ... | awk -F'\\t' '$2 != ""' # only commits unique to BRANCH2 $ ... | awk -F'\\t' '$3 != ""' # only common commits $ ... | awk -F'\\t' '$3 == ""' # exclude common commmits This assumes that both branches are in the repository that contains the current directory. To compare branches from different upstream repositories (for example 'origin/master' and 'other/b_post_cmd3') do: $ cd fs/lustre-release $ git fetch origin $ git remote add other ... $ git fetch other $ _PROGNAME_ origin/master other/b_post_cmd3""" def main(): if len(sys.argv) < 3: print(USAGE.replace('_PROGNAME_', sys.argv[0]), file=sys.stderr) sys.exit(1) paths = sys.argv[3:] b1 = Branch(sys.argv[1], paths) b1.load() b2 = Branch(sys.argv[2], paths) b2.load() branch_comm(b1, b2) if __name__ == '__main__': main()