contrib/scripts/gerrit_checkpatch.py

   1 #!/usr/bin/env python
   2 #
   3 # GPL HEADER START
   4 #
   5 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License version 2 only,
   9 # as published by the Free Software Foundation.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License version 2 for more details (a copy is included
  15 # in the LICENSE file that accompanied this code).
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # version 2 along with this program; If not, see
  19 # http://www.gnu.org/licenses/gpl-2.0.html
  20 #
  21 # GPL HEADER END
  22 #
  23 # Copyright (c) 2014, Intel Corporation.
  24 #
  25 # Author: John L. Hammond <john.hammond@intel.com>
  26 #
  27 """
  28 Gerrit Checkpatch Reviewer Daemon
  29 ~~~~~~ ~~~~~~~~~~ ~~~~~~~~ ~~~~~~
  30
  31 * Watch for new change revisions in a gerrit instance.
  32 * Pass new revisions through checkpatch script.
  33 * POST reviews back to gerrit based on checkpatch output.
  34 """
  35
  36 import base64
  37 import fnmatch
  38 import logging
  39 import json
  40 import os
  41 import requests
  42 import subprocess
  43 import time
  44 import urllib
  45
  46 def _getenv_list(key, default=None, sep=':'):
  47     """
  48     'PATH' => ['/bin', '/usr/bin', ...]
  49     """
  50     value = os.getenv(key)
  51     if value is None:
  52         return default
  53     else:
  54         return value.split(sep)
  55
  56 GERRIT_HOST = os.getenv('GERRIT_HOST', 'review.whamcloud.com')
  57 GERRIT_PROJECT = os.getenv('GERRIT_PROJECT', 'fs/lustre-release')
  58 GERRIT_BRANCH = os.getenv('GERRIT_BRANCH', 'master')
  59 GERRIT_AUTH_PATH = os.getenv('GERRIT_AUTH_PATH', 'GERRIT_AUTH')
  60 GERRIT_CHANGE_NUMBER = os.getenv('GERRIT_CHANGE_NUMBER', None)
  61
  62 # GERRIT_AUTH should contain a single JSON dictionary of the form:
  63 # {
  64 #     "review.example.com": {
  65 #         "gerrit/http": {
  66 #             "username": "example-checkpatch",
  67 #             "password": "1234"
  68 #         }
  69 #     }
  70 #     ...
  71 # }
  72
  73 CHECKPATCH_PATHS = _getenv_list('CHECKPATCH_PATHS', ['checkpatch.pl'])
  74 CHECKPATCH_ARGS = os.getenv('CHECKPATCH_ARGS','--show-types -').split(' ')
  75 CHECKPATCH_IGNORED_FILES = _getenv_list('CHECKPATCH_IGNORED_FILES', [
  76         'lustre/contrib/wireshark/packet-lustre.c',
  77         'lustre/ptlrpc/wiretest.c',
  78         'lustre/utils/wiretest.c',
  79         '*.patch'])
  80 CHECKPATCH_IGNORED_KINDS = _getenv_list('CHECKPATCH_IGNORED_KINDS', [
  81         'LASSERT',
  82         'LCONSOLE',
  83         'LEADING_SPACE'])
  84 REVIEW_HISTORY_PATH = os.getenv('REVIEW_HISTORY_PATH', 'REVIEW_HISTORY')
  85 STYLE_LINK = os.getenv('STYLE_LINK',
  86         'http://wiki.lustre.org/Lustre_Coding_Style_Guidelines')
  87
  88 USE_CODE_REVIEW_SCORE = False
  89
  90 def parse_checkpatch_output(out, path_line_comments, warning_count):
  91     """
  92     Parse string output out of CHECKPATCH into path_line_comments.
  93     Increment warning_count[0] for each warning.
  94
  95     path_line_comments is { PATH: { LINE: [COMMENT, ...] }, ... }.
  96     """
  97     def add_comment(path, line, level, kind, message):
  98         """_"""
  99         logging.debug("add_comment %s %d %s %s '%s'",
 100                       path, line, level, kind, message)
 101         if kind in CHECKPATCH_IGNORED_KINDS:
 102             return
 103
 104         for pattern in CHECKPATCH_IGNORED_FILES:
 105             if fnmatch.fnmatch(path, pattern):
 106                 return
 107
 108         path_comments = path_line_comments.setdefault(path, {})
 109         line_comments = path_comments.setdefault(line, [])
 110         line_comments.append('(style) %s\n' % message)
 111         warning_count[0] += 1
 112
 113     level = None # 'ERROR', 'WARNING'
 114     kind = None # 'CODE_INDENT', 'LEADING_SPACE', ...
 115     message = None # 'code indent should use tabs where possible'
 116
 117     for line in out.splitlines():
 118         # ERROR:CODE_INDENT: code indent should use tabs where possible
 119         # #404: FILE: lustre/liblustre/dir.c:103:
 120         # +        op_data.op_hash_offset = hash_x_index(page->index, 0);$
 121         line = line.strip()
 122         if not line:
 123             level, kind, message = None, None, None
 124         elif line[0] == '#':
 125             # '#404: FILE: lustre/liblustre/dir.c:103:'
 126             tokens = line.split(':', 5)
 127             if len(tokens) != 5 or tokens[1] != ' FILE':
 128                 continue
 129
 130             path = tokens[2].strip()
 131             line_number_str = tokens[3].strip()
 132             if not line_number_str.isdigit():
 133                 continue
 134
 135             line_number = int(line_number_str)
 136
 137             if path and level and kind and message:
 138                 add_comment(path, line_number, level, kind, message)
 139         elif line[0] == '+':
 140             continue
 141         else:
 142             # ERROR:CODE_INDENT: code indent should use tabs where possible
 143             try:
 144                 level, kind, message = line.split(':', 2)
 145             except ValueError:
 146                 level, kind, message = None, None, None
 147
 148             if level != 'ERROR' and level != 'WARNING':
 149                 level, kind, message = None, None, None
 150
 151
 152 def review_input_and_score(path_line_comments, warning_count):
 153     """
 154     Convert { PATH: { LINE: [COMMENT, ...] }, ... }, [11] to a gerrit
 155     ReviewInput() and score
 156     """
 157     review_comments = {}
 158
 159     for path, line_comments in path_line_comments.iteritems():
 160         path_comments = []
 161         for line, comment_list in line_comments.iteritems():
 162             message = '\n'.join(comment_list)
 163             path_comments.append({'line': line, 'message': message})
 164         review_comments[path] = path_comments
 165
 166     if warning_count[0] > 0:
 167         score = -1
 168     else:
 169         score = +1
 170
 171     if USE_CODE_REVIEW_SCORE:
 172         code_review_score = score
 173     else:
 174         code_review_score = 0
 175
 176     if score < 0:
 177         return {
 178             'message': ('%d style warning(s).\nFor more details please see %s' %
 179                         (warning_count[0], STYLE_LINK)),
 180             'labels': {
 181                 'Code-Review': code_review_score
 182                 },
 183             'comments': review_comments,
 184             'notify': 'OWNER',
 185             }, score
 186     else:
 187         return {
 188             'message': 'Looks good to me.',
 189             'labels': {
 190                 'Code-Review': code_review_score
 191                 },
 192             'notify': 'NONE',
 193             }, score
 194
 195
 196 def _now():
 197     """_"""
 198     return long(time.time())
 199
 200
 201 class Reviewer(object):
 202     """
 203     * Poll gerrit instance for updates to changes matching project and branch.
 204     * Pipe new patches through checkpatch.
 205     * Convert checkpatch output to gerrit ReviewInput().
 206     * Post ReviewInput() to gerrit instance.
 207     * Track reviewed revisions in history_path.
 208     """
 209     def __init__(self, host, project, branch, username, password, history_path):
 210         self.host = host
 211         self.project = project
 212         self.branch = branch
 213         self.auth = requests.auth.HTTPDigestAuth(username, password)
 214         self.logger = logging.getLogger(__name__)
 215         self.history_path = history_path
 216         self.history_mode = 'rw'
 217         self.history = {}
 218         self.timestamp = 0L
 219         self.post_enabled = True
 220         self.post_interval = 10
 221         self.update_interval = 300
 222         self.request_timeout = 60
 223
 224     def _debug(self, msg, *args):
 225         """_"""
 226         self.logger.debug(msg, *args)
 227
 228     def _error(self, msg, *args):
 229         """_"""
 230         self.logger.error(msg, *args)
 231
 232     def _url(self, path):
 233         """_"""
 234         return 'http://' + self.host + '/a' + path
 235
 236     def _get(self, path):
 237         """
 238         GET path return Response.
 239         """
 240         url = self._url(path)
 241         try:
 242             res = requests.get(url, auth=self.auth,
 243                                timeout=self.request_timeout)
 244         except Exception as exc:
 245             self._error("cannot GET '%s': exception = %s", url, str(exc))
 246             return None
 247
 248         if res.status_code != requests.codes.ok:
 249             self._error("cannot GET '%s': reason = %s, status_code = %d",
 250                        url, res.reason, res.status_code)
 251             return None
 252
 253         return res
 254
 255     def _post(self, path, obj):
 256         """
 257         POST json(obj) to path, return True on success.
 258         """
 259         url = self._url(path)
 260         data = json.dumps(obj)
 261         if not self.post_enabled:
 262             self._debug("_post: disabled: url = '%s', data = '%s'", url, data)
 263             return False
 264
 265         try:
 266             res = requests.post(url, data=data,
 267                                 headers={'Content-Type': 'application/json'},
 268                                 auth=self.auth, timeout=self.request_timeout)
 269         except Exception as exc:
 270             self._error("cannot POST '%s': exception = %s", url, str(exc))
 271             return False
 272
 273         if res.status_code != requests.codes.ok:
 274             self._error("cannot POST '%s': reason = %s, status_code = %d",
 275                        url, res.reason, res.status_code)
 276             return False
 277
 278         return True
 279
 280     def load_history(self):
 281         """
 282         Load review history from history_path containing lines of the form:
 283         EPOCH      FULL_CHANGE_ID                         REVISION    SCORE
 284         1394536722 fs%2Flustre-release~master~I5cc6c23... 00e2cc75... 1
 285         1394536721 -                                      -           0
 286         1394537033 fs%2Flustre-release~master~I10be8e9... 44f7b504... 1
 287         1394537032 -                                      -           0
 288         1394537344 -                                      -           0
 289         ...
 290         """
 291         if 'r' in self.history_mode:
 292             with open(self.history_path) as history_file:
 293                 for line in history_file:
 294                     epoch, change_id, revision, score = line.split()
 295                     if change_id == '-':
 296                         self.timestamp = long(float(epoch))
 297                     else:
 298                         self.history[change_id + ' ' + revision] = score
 299
 300         self._debug("load_history: history size = %d, timestamp = %d",
 301                     len(self.history), self.timestamp)
 302
 303     def write_history(self, change_id, revision, score, epoch=-1):
 304         """
 305         Add review record to history dict and file.
 306         """
 307         if change_id != '-':
 308             self.history[change_id + ' ' + revision] = score
 309
 310         if epoch <= 0:
 311             epoch = self.timestamp
 312
 313         if 'w' in self.history_mode:
 314             with open(self.history_path, 'a') as history_file:
 315                 print >> history_file, epoch, change_id, revision, score
 316
 317     def in_history(self, change_id, revision):
 318         """
 319         Return True if change_id/revision was already reviewed.
 320         """
 321         return change_id + ' ' + revision in self.history
 322
 323     def get_change_by_id(self, change_id):
 324         """
 325         GET one change by id.
 326         """
 327         path = ('/changes/' + urllib.quote(self.project, safe='') + '~' +
 328                 urllib.quote(self.branch, safe='') + '~' + change_id +
 329                 '?o=CURRENT_REVISION')
 330         res = self._get(path)
 331         if not res:
 332             return None
 333
 334         # Gerrit uses " )]}'" to guard against XSSI.
 335         return json.loads(res.content[5:])
 336
 337     def get_changes(self, query):
 338         """
 339         GET a list of ChangeInfo()s for all changes matching query.
 340
 341         {'status':'open', '-age':'60m'} =>
 342           GET /changes/?q=project:...+status:open+-age:60m&o=CURRENT_REVISION =>
 343             [ChangeInfo()...]
 344         """
 345         query = dict(query)
 346         project = query.get('project', self.project)
 347         query['project'] = urllib.quote(project, safe='')
 348         branch = query.get('branch', self.branch)
 349         query['branch'] = urllib.quote(branch, safe='')
 350         path = ('/changes/?q=' +
 351                 '+'.join(k + ':' + v for k, v in query.iteritems()) +
 352                 '&o=CURRENT_REVISION')
 353         res = self._get(path)
 354         if not res:
 355             return []
 356
 357         # Gerrit uses " )]}'" to guard against XSSI.
 358         return json.loads(res.content[5:])
 359
 360     def decode_patch(self, content):
 361         """
 362         Decode gerrit's idea of base64.
 363
 364         The base64 encoded patch returned by gerrit isn't always
 365         padded correctly according to b64decode. Don't know why. Work
 366         around this by appending more '=' characters or truncating the
 367         content until it decodes. But do try the unmodified content
 368         first.
 369         """
 370         for i in (0, 1, 2, 3, -1, -2, -3):
 371             if i >= 0:
 372                 padded_content = content + (i * '=')
 373             else:
 374                 padded_content = content[:i]
 375
 376             try:
 377                 return base64.b64decode(padded_content)
 378             except TypeError as exc:
 379                 self._debug("decode_patch: len = %d, exception = %s",
 380                            len(padded_content), str(exc))
 381         else:
 382             return ''
 383
 384     def get_patch(self, change, revision='current'):
 385         """
 386         GET and decode the (current) patch for change.
 387         """
 388         path = '/changes/' + change['id'] + '/revisions/' + revision + '/patch'
 389         self._debug("get_patch: path = '%s'", path)
 390         res = self._get(path)
 391         if not res:
 392             return ''
 393
 394         self._debug("get_patch: len(content) = %d, content = '%s...'",
 395                    len(res.content), res.content[:20])
 396
 397         return self.decode_patch(res.content)
 398
 399     def post_review(self, change, revision, review_input):
 400         """
 401         POST review_input for the given revision of change.
 402         """
 403         path = '/changes/' + change['id'] + '/revisions/' + revision + '/review'
 404         self._debug("post_review: path = '%s'", path)
 405         return self._post(path, review_input)
 406
 407     def check_patch(self, patch):
 408         """
 409         Run each script in CHECKPATCH_PATHS on patch, return a
 410         ReviewInput() and score.
 411         """
 412         path_line_comments = {}
 413         warning_count = [0]
 414
 415         for path in CHECKPATCH_PATHS:
 416             pipe = subprocess.Popen([path] + CHECKPATCH_ARGS,
 417                                     stdin=subprocess.PIPE,
 418                                     stdout=subprocess.PIPE,
 419                                     stderr=subprocess.PIPE)
 420             out, err = pipe.communicate(patch)
 421             self._debug("check_patch: path = %s, out = '%s...', err = '%s...'",
 422                         path, out[:80], err[:80])
 423             parse_checkpatch_output(out, path_line_comments, warning_count)
 424
 425         return review_input_and_score(path_line_comments, warning_count)
 426
 427     def change_needs_review(self, change):
 428         """
 429         * Bail if the change isn't open (status is not 'NEW').
 430         * Bail if we've already reviewed the current revision.
 431         """
 432         status = change.get('status')
 433         if status != 'NEW':
 434             self._debug("change_needs_review: status = %s", status)
 435             return False
 436
 437         current_revision = change.get('current_revision')
 438         self._debug("change_needs_review: current_revision = '%s'",
 439                     current_revision)
 440         if not current_revision:
 441             return False
 442
 443         # Have we already checked this revision?
 444         if self.in_history(change['id'], current_revision):
 445             self._debug("change_needs_review: already reviewed")
 446             return False
 447
 448         return True
 449
 450     def review_change(self, change):
 451         """
 452         Review the current revision of change.
 453         * Pipe the patch through checkpatch(es).
 454         * Save results to review history.
 455         * POST review to gerrit.
 456         """
 457         self._debug("review_change: change = %s, subject = '%s'",
 458                     change['id'], change.get('subject', ''))
 459
 460         current_revision = change.get('current_revision')
 461         self._debug("change_needs_review: current_revision = '%s'",
 462                     current_revision)
 463         if not current_revision:
 464             return
 465
 466         patch = self.get_patch(change, current_revision)
 467         if not patch:
 468             self._debug("review_change: no patch")
 469             return
 470
 471         review_input, score = self.check_patch(patch)
 472         self._debug("review_change: score = %d", score)
 473         self.write_history(change['id'], current_revision, score)
 474         self.post_review(change, current_revision, review_input)
 475
 476     def update(self):
 477         """
 478         GET recently updated changes and review as needed.
 479         """
 480         new_timestamp = _now()
 481         age = new_timestamp - self.timestamp + 60 * 60 # 1h padding
 482         self._debug("update: age = %d", age)
 483
 484         open_changes = self.get_changes({'status':'open',
 485                                          '-age':str(age) + 's'})
 486         self._debug("update: got %d open_changes", len(open_changes))
 487
 488         for change in open_changes:
 489             if self.change_needs_review(change):
 490                 self.review_change(change)
 491                 # Don't POST more than every post_interval seconds.
 492                 time.sleep(self.post_interval)
 493
 494         self.timestamp = new_timestamp
 495         self.write_history('-', '-', 0)
 496
 497     def update_single_change(self, change):
 498
 499         self.load_history()
 500
 501         open_changes = self.get_changes({'status':'open',
 502                                          'change':change})
 503         self._debug("update: got %d open_changes", len(open_changes))
 504
 505         for change in open_changes:
 506             if self.change_needs_review(change):
 507                 self.review_change(change)
 508
 509     def run(self):
 510         """
 511         * Load review history.
 512         * Call update() every poll_interval seconds.
 513         """
 514
 515         if self.timestamp <= 0:
 516             self.load_history()
 517
 518         while True:
 519             self.update()
 520             time.sleep(self.update_interval)
 521
 522
 523 def main():
 524     """_"""
 525     logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
 526
 527     with open(GERRIT_AUTH_PATH) as auth_file:
 528         auth = json.load(auth_file)
 529         username = auth[GERRIT_HOST]['gerrit/http']['username']
 530         password = auth[GERRIT_HOST]['gerrit/http']['password']
 531
 532     reviewer = Reviewer(GERRIT_HOST, GERRIT_PROJECT, GERRIT_BRANCH,
 533                         username, password, REVIEW_HISTORY_PATH)
 534
 535     if GERRIT_CHANGE_NUMBER:
 536         reviewer.update_single_change(GERRIT_CHANGE_NUMBER)
 537     else:
 538         reviewer.run()
 539
 540
 541 if __name__ == "__main__":
 542     main()