contrib/scripts/gerrit_checkpatch.py

   1 #!/usr/bin/env python2
   2 #
   3 # GPL HEADER START
   4 #
   5 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License version 2 only,
   9 # as published by the Free Software Foundation.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License version 2 for more details (a copy is included
  15 # in the LICENSE file that accompanied this code).
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # version 2 along with this program; If not, see
  19 # http://www.gnu.org/licenses/gpl-2.0.html
  20 #
  21 # GPL HEADER END
  22 #
  23 # Copyright (c) 2014, Intel Corporation.
  24 #
  25 # Author: John L. Hammond <john.hammond@intel.com>
  26 #
  27 """
  28 Gerrit Checkpatch Reviewer Daemon
  29 ~~~~~~ ~~~~~~~~~~ ~~~~~~~~ ~~~~~~
  30
  31 * Watch for new change revisions in a gerrit instance.
  32 * Pass new revisions through checkpatch script.
  33 * POST reviews back to gerrit based on checkpatch output.
  34 """
  35
  36 import base64
  37 import fnmatch
  38 import logging
  39 import json
  40 import os
  41 import requests
  42 import subprocess
  43 import time
  44 import urllib
  45
  46 def _getenv_list(key, default=None, sep=':'):
  47     """
  48     'PATH' => ['/bin', '/usr/bin', ...]
  49     """
  50     value = os.getenv(key)
  51     if value is None:
  52         return default
  53     else:
  54         return value.split(sep)
  55
  56 GERRIT_HOST = os.getenv('GERRIT_HOST', 'review.whamcloud.com')
  57 GERRIT_PROJECT = os.getenv('GERRIT_PROJECT', 'fs/lustre-release')
  58 GERRIT_BRANCH = os.getenv('GERRIT_BRANCH', 'master')
  59 GERRIT_AUTH_PATH = os.getenv('GERRIT_AUTH_PATH', 'GERRIT_AUTH')
  60 GERRIT_CHANGE_NUMBER = os.getenv('GERRIT_CHANGE_NUMBER', None)
  61
  62 # GERRIT_AUTH should contain a single JSON dictionary of the form:
  63 # {
  64 #     "review.example.com": {
  65 #         "gerrit/http": {
  66 #             "username": "example-checkpatch",
  67 #             "password": "1234"
  68 #         }
  69 #     }
  70 #     ...
  71 # }
  72
  73 CHECKPATCH_PATHS = _getenv_list('CHECKPATCH_PATHS', ['checkpatch.pl'])
  74 CHECKPATCH_ARGS = os.getenv('CHECKPATCH_ARGS','--show-types -').split(' ')
  75 CHECKPATCH_IGNORED_FILES = _getenv_list('CHECKPATCH_IGNORED_FILES', [
  76         'lustre/ptlrpc/wiretest.c',
  77         'lustre/utils/wiretest.c',
  78         '*.patch'])
  79 CHECKPATCH_IGNORED_KINDS = _getenv_list('CHECKPATCH_IGNORED_KINDS', [
  80         'LASSERT',
  81         'LCONSOLE',
  82         'LEADING_SPACE'])
  83 REVIEW_HISTORY_PATH = os.getenv('REVIEW_HISTORY_PATH', 'REVIEW_HISTORY')
  84 STYLE_LINK = os.getenv('STYLE_LINK',
  85         'http://wiki.lustre.org/Lustre_Coding_Style_Guidelines')
  86
  87 USE_CODE_REVIEW_SCORE = False
  88
  89 def parse_checkpatch_output(out, path_line_comments, warning_count):
  90     """
  91     Parse string output out of CHECKPATCH into path_line_comments.
  92     Increment warning_count[0] for each warning.
  93
  94     path_line_comments is { PATH: { LINE: [COMMENT, ...] }, ... }.
  95     """
  96     def add_comment(path, line, level, kind, message):
  97         """_"""
  98         logging.debug("add_comment %s %d %s %s '%s'",
  99                       path, line, level, kind, message)
 100         if kind in CHECKPATCH_IGNORED_KINDS:
 101             return
 102
 103         for pattern in CHECKPATCH_IGNORED_FILES:
 104             if fnmatch.fnmatch(path, pattern):
 105                 return
 106
 107         path_comments = path_line_comments.setdefault(path, {})
 108         line_comments = path_comments.setdefault(line, [])
 109         line_comments.append('(style) %s\n' % message)
 110         warning_count[0] += 1
 111
 112     level = None # 'ERROR', 'WARNING'
 113     kind = None # 'CODE_INDENT', 'LEADING_SPACE', ...
 114     message = None # 'code indent should use tabs where possible'
 115
 116     for line in out.splitlines():
 117         # ERROR:CODE_INDENT: code indent should use tabs where possible
 118         # #404: FILE: lustre/liblustre/dir.c:103:
 119         # +        op_data.op_hash_offset = hash_x_index(page->index, 0);$
 120         line = line.strip()
 121         if not line:
 122             level, kind, message = None, None, None
 123         elif line[0] == '#':
 124             # '#404: FILE: lustre/liblustre/dir.c:103:'
 125             tokens = line.split(':', 5)
 126             if len(tokens) != 5 or tokens[1] != ' FILE':
 127                 continue
 128
 129             path = tokens[2].strip()
 130             line_number_str = tokens[3].strip()
 131             if not line_number_str.isdigit():
 132                 continue
 133
 134             line_number = int(line_number_str)
 135
 136             if path and level and kind and message:
 137                 add_comment(path, line_number, level, kind, message)
 138         elif line[0] == '+':
 139             continue
 140         else:
 141             # ERROR:CODE_INDENT: code indent should use tabs where possible
 142             try:
 143                 level, kind, message = line.split(':', 2)
 144             except ValueError:
 145                 level, kind, message = None, None, None
 146
 147             if level != 'ERROR' and level != 'WARNING':
 148                 level, kind, message = None, None, None
 149
 150
 151 def review_input_and_score(path_line_comments, warning_count):
 152     """
 153     Convert { PATH: { LINE: [COMMENT, ...] }, ... }, [11] to a gerrit
 154     ReviewInput() and score
 155     """
 156     review_comments = {}
 157
 158     for path, line_comments in path_line_comments.iteritems():
 159         path_comments = []
 160         for line, comment_list in line_comments.iteritems():
 161             message = '\n'.join(comment_list)
 162             path_comments.append({'line': line, 'message': message})
 163         review_comments[path] = path_comments
 164
 165     if warning_count[0] > 0:
 166         score = -1
 167     else:
 168         score = +1
 169
 170     if USE_CODE_REVIEW_SCORE:
 171         code_review_score = score
 172     else:
 173         code_review_score = 0
 174
 175     if score < 0:
 176         return {
 177             'message': ('%d style warning(s).\nFor more details please see %s' %
 178                         (warning_count[0], STYLE_LINK)),
 179             'labels': {
 180                 'Code-Review': code_review_score
 181                 },
 182             'comments': review_comments,
 183             'notify': 'OWNER',
 184             }, score
 185     else:
 186         return {
 187             'message': 'Looks good to me.',
 188             'labels': {
 189                 'Code-Review': code_review_score
 190                 },
 191             'notify': 'NONE',
 192             }, score
 193
 194
 195 def _now():
 196     """_"""
 197     return long(time.time())
 198
 199
 200 class Reviewer(object):
 201     """
 202     * Poll gerrit instance for updates to changes matching project and branch.
 203     * Pipe new patches through checkpatch.
 204     * Convert checkpatch output to gerrit ReviewInput().
 205     * Post ReviewInput() to gerrit instance.
 206     * Track reviewed revisions in history_path.
 207     """
 208     def __init__(self, host, project, branch, username, password, history_path):
 209         self.host = host
 210         self.project = project
 211         self.branch = branch
 212         self.auth = requests.auth.HTTPDigestAuth(username, password)
 213         self.logger = logging.getLogger(__name__)
 214         self.history_path = history_path
 215         self.history_mode = 'rw'
 216         self.history = {}
 217         self.timestamp = 0L
 218         self.post_enabled = True
 219         self.post_interval = 10
 220         self.update_interval = 300
 221         self.request_timeout = 60
 222
 223     def _debug(self, msg, *args):
 224         """_"""
 225         self.logger.debug(msg, *args)
 226
 227     def _error(self, msg, *args):
 228         """_"""
 229         self.logger.error(msg, *args)
 230
 231     def _url(self, path):
 232         """_"""
 233         return 'http://' + self.host + '/a' + path
 234
 235     def _get(self, path):
 236         """
 237         GET path return Response.
 238         """
 239         url = self._url(path)
 240         try:
 241             res = requests.get(url, auth=self.auth,
 242                                timeout=self.request_timeout)
 243         except Exception as exc:
 244             self._error("cannot GET '%s': exception = %s", url, str(exc))
 245             return None
 246
 247         if res.status_code != requests.codes.ok:
 248             self._error("cannot GET '%s': reason = %s, status_code = %d",
 249                        url, res.reason, res.status_code)
 250             return None
 251
 252         return res
 253
 254     def _post(self, path, obj):
 255         """
 256         POST json(obj) to path, return True on success.
 257         """
 258         url = self._url(path)
 259         data = json.dumps(obj)
 260         if not self.post_enabled:
 261             self._debug("_post: disabled: url = '%s', data = '%s'", url, data)
 262             return False
 263
 264         try:
 265             res = requests.post(url, data=data,
 266                                 headers={'Content-Type': 'application/json'},
 267                                 auth=self.auth, timeout=self.request_timeout)
 268         except Exception as exc:
 269             self._error("cannot POST '%s': exception = %s", url, str(exc))
 270             return False
 271
 272         if res.status_code != requests.codes.ok:
 273             self._error("cannot POST '%s': reason = %s, status_code = %d",
 274                        url, res.reason, res.status_code)
 275             return False
 276
 277         return True
 278
 279     def load_history(self):
 280         """
 281         Load review history from history_path containing lines of the form:
 282         EPOCH      FULL_CHANGE_ID                         REVISION    SCORE
 283         1394536722 fs%2Flustre-release~master~I5cc6c23... 00e2cc75... 1
 284         1394536721 -                                      -           0
 285         1394537033 fs%2Flustre-release~master~I10be8e9... 44f7b504... 1
 286         1394537032 -                                      -           0
 287         1394537344 -                                      -           0
 288         ...
 289         """
 290         if 'r' in self.history_mode:
 291             with open(self.history_path) as history_file:
 292                 for line in history_file:
 293                     epoch, change_id, revision, score = line.split()
 294                     if change_id == '-':
 295                         self.timestamp = long(float(epoch))
 296                     else:
 297                         self.history[change_id + ' ' + revision] = score
 298
 299         self._debug("load_history: history size = %d, timestamp = %d",
 300                     len(self.history), self.timestamp)
 301
 302     def write_history(self, change_id, revision, score, epoch=-1):
 303         """
 304         Add review record to history dict and file.
 305         """
 306         if change_id != '-':
 307             self.history[change_id + ' ' + revision] = score
 308
 309         if epoch <= 0:
 310             epoch = self.timestamp
 311
 312         if 'w' in self.history_mode:
 313             with open(self.history_path, 'a') as history_file:
 314                 print >> history_file, epoch, change_id, revision, score
 315
 316     def in_history(self, change_id, revision):
 317         """
 318         Return True if change_id/revision was already reviewed.
 319         """
 320         return change_id + ' ' + revision in self.history
 321
 322     def get_change_by_id(self, change_id):
 323         """
 324         GET one change by id.
 325         """
 326         path = ('/changes/' + urllib.quote(self.project, safe='') + '~' +
 327                 urllib.quote(self.branch, safe='') + '~' + change_id +
 328                 '?o=CURRENT_REVISION')
 329         res = self._get(path)
 330         if not res:
 331             return None
 332
 333         # Gerrit uses " )]}'" to guard against XSSI.
 334         return json.loads(res.content[5:])
 335
 336     def get_changes(self, query):
 337         """
 338         GET a list of ChangeInfo()s for all changes matching query.
 339
 340         {'status':'open', '-age':'60m'} =>
 341           GET /changes/?q=project:...+status:open+-age:60m&o=CURRENT_REVISION =>
 342             [ChangeInfo()...]
 343         """
 344         query = dict(query)
 345         project = query.get('project', self.project)
 346         query['project'] = urllib.quote(project, safe='')
 347         branch = query.get('branch', self.branch)
 348         query['branch'] = urllib.quote(branch, safe='')
 349         path = ('/changes/?q=' +
 350                 '+'.join(k + ':' + v for k, v in query.iteritems()) +
 351                 '&o=CURRENT_REVISION')
 352         res = self._get(path)
 353         if not res:
 354             return []
 355
 356         # Gerrit uses " )]}'" to guard against XSSI.
 357         return json.loads(res.content[5:])
 358
 359     def decode_patch(self, content):
 360         """
 361         Decode gerrit's idea of base64.
 362
 363         The base64 encoded patch returned by gerrit isn't always
 364         padded correctly according to b64decode. Don't know why. Work
 365         around this by appending more '=' characters or truncating the
 366         content until it decodes. But do try the unmodified content
 367         first.
 368         """
 369         for i in (0, 1, 2, 3, -1, -2, -3):
 370             if i >= 0:
 371                 padded_content = content + (i * '=')
 372             else:
 373                 padded_content = content[:i]
 374
 375             try:
 376                 return base64.b64decode(padded_content)
 377             except TypeError as exc:
 378                 self._debug("decode_patch: len = %d, exception = %s",
 379                            len(padded_content), str(exc))
 380         else:
 381             return ''
 382
 383     def get_patch(self, change, revision='current'):
 384         """
 385         GET and decode the (current) patch for change.
 386         """
 387         path = '/changes/' + change['id'] + '/revisions/' + revision + '/patch'
 388         self._debug("get_patch: path = '%s'", path)
 389         res = self._get(path)
 390         if not res:
 391             return ''
 392
 393         self._debug("get_patch: len(content) = %d, content = '%s...'",
 394                    len(res.content), res.content[:20])
 395
 396         return self.decode_patch(res.content)
 397
 398     def post_review(self, change, revision, review_input):
 399         """
 400         POST review_input for the given revision of change.
 401         """
 402         path = '/changes/' + change['id'] + '/revisions/' + revision + '/review'
 403         self._debug("post_review: path = '%s'", path)
 404         return self._post(path, review_input)
 405
 406     def check_patch(self, patch):
 407         """
 408         Run each script in CHECKPATCH_PATHS on patch, return a
 409         ReviewInput() and score.
 410         """
 411         path_line_comments = {}
 412         warning_count = [0]
 413
 414         for path in CHECKPATCH_PATHS:
 415             pipe = subprocess.Popen([path] + CHECKPATCH_ARGS,
 416                                     stdin=subprocess.PIPE,
 417                                     stdout=subprocess.PIPE,
 418                                     stderr=subprocess.PIPE)
 419             out, err = pipe.communicate(patch)
 420             self._debug("check_patch: path = %s, out = '%s...', err = '%s...'",
 421                         path, out[:80], err[:80])
 422             parse_checkpatch_output(out, path_line_comments, warning_count)
 423
 424         return review_input_and_score(path_line_comments, warning_count)
 425
 426     def change_needs_review(self, change):
 427         """
 428         * Bail if the change isn't open (status is not 'NEW').
 429         * Bail if we've already reviewed the current revision.
 430         """
 431         status = change.get('status')
 432         if status != 'NEW':
 433             self._debug("change_needs_review: status = %s", status)
 434             return False
 435
 436         current_revision = change.get('current_revision')
 437         self._debug("change_needs_review: current_revision = '%s'",
 438                     current_revision)
 439         if not current_revision:
 440             return False
 441
 442         # Have we already checked this revision?
 443         if self.in_history(change['id'], current_revision):
 444             self._debug("change_needs_review: already reviewed")
 445             return False
 446
 447         return True
 448
 449     def review_change(self, change):
 450         """
 451         Review the current revision of change.
 452         * Pipe the patch through checkpatch(es).
 453         * Save results to review history.
 454         * POST review to gerrit.
 455         """
 456         self._debug("review_change: change = %s, subject = '%s'",
 457                     change['id'], change.get('subject', ''))
 458
 459         current_revision = change.get('current_revision')
 460         self._debug("change_needs_review: current_revision = '%s'",
 461                     current_revision)
 462         if not current_revision:
 463             return
 464
 465         patch = self.get_patch(change, current_revision)
 466         if not patch:
 467             self._debug("review_change: no patch")
 468             return
 469
 470         review_input, score = self.check_patch(patch)
 471         self._debug("review_change: score = %d", score)
 472         self.write_history(change['id'], current_revision, score)
 473         self.post_review(change, current_revision, review_input)
 474
 475     def update(self):
 476         """
 477         GET recently updated changes and review as needed.
 478         """
 479         new_timestamp = _now()
 480         age = new_timestamp - self.timestamp + 60 * 60 # 1h padding
 481         self._debug("update: age = %d", age)
 482
 483         open_changes = self.get_changes({'status':'open',
 484                                          '-age':str(age) + 's'})
 485         self._debug("update: got %d open_changes", len(open_changes))
 486
 487         for change in open_changes:
 488             if self.change_needs_review(change):
 489                 self.review_change(change)
 490                 # Don't POST more than every post_interval seconds.
 491                 time.sleep(self.post_interval)
 492
 493         self.timestamp = new_timestamp
 494         self.write_history('-', '-', 0)
 495
 496     def update_single_change(self, change):
 497
 498         self.load_history()
 499
 500         open_changes = self.get_changes({'status':'open',
 501                                          'change':change})
 502         self._debug("update: got %d open_changes", len(open_changes))
 503
 504         for change in open_changes:
 505             if self.change_needs_review(change):
 506                 self.review_change(change)
 507
 508     def run(self):
 509         """
 510         * Load review history.
 511         * Call update() every poll_interval seconds.
 512         """
 513
 514         if self.timestamp <= 0:
 515             self.load_history()
 516
 517         while True:
 518             self.update()
 519             time.sleep(self.update_interval)
 520
 521
 522 def main():
 523     """_"""
 524     logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
 525
 526     with open(GERRIT_AUTH_PATH) as auth_file:
 527         auth = json.load(auth_file)
 528         username = auth[GERRIT_HOST]['gerrit/http']['username']
 529         password = auth[GERRIT_HOST]['gerrit/http']['password']
 530
 531     reviewer = Reviewer(GERRIT_HOST, GERRIT_PROJECT, GERRIT_BRANCH,
 532                         username, password, REVIEW_HISTORY_PATH)
 533
 534     if GERRIT_CHANGE_NUMBER:
 535         reviewer.update_single_change(GERRIT_CHANGE_NUMBER)
 536     else:
 537         reviewer.run()
 538
 539
 540 if __name__ == "__main__":
 541     main()