contrib/scripts/gerrit_checkpatch.py

   1 #!/usr/bin/env python
   2 #
   3 # GPL HEADER START
   4 #
   5 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License version 2 only,
   9 # as published by the Free Software Foundation.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License version 2 for more details (a copy is included
  15 # in the LICENSE file that accompanied this code).
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # version 2 along with this program; If not, see
  19 # http://www.gnu.org/licenses/gpl-2.0.html
  20 #
  21 # GPL HEADER END
  22 #
  23 # Copyright (c) 2014, Intel Corporation.
  24 #
  25 # Author: John L. Hammond <john.hammond@intel.com>
  26 #
  27 """
  28 Gerrit Checkpatch Reviewer Daemon
  29 ~~~~~~ ~~~~~~~~~~ ~~~~~~~~ ~~~~~~
  30
  31 * Watch for new change revisions in a gerrit instance.
  32 * Pass new revisions through checkpatch script.
  33 * POST reviews back to gerrit based on checkpatch output.
  34 """
  35
  36 import base64
  37 import fnmatch
  38 import logging
  39 import json
  40 import os
  41 import requests
  42 import subprocess
  43 import time
  44 import urllib
  45
  46 def _getenv_list(key, default=None, sep=':'):
  47     """
  48     'PATH' => ['/bin', '/usr/bin', ...]
  49     """
  50     value = os.getenv(key)
  51     if value is None:
  52         return default
  53     else:
  54         return value.split(sep)
  55
  56 GERRIT_HOST = os.getenv('GERRIT_HOST', 'review.whamcloud.com')
  57 GERRIT_PROJECT = os.getenv('GERRIT_PROJECT', 'fs/lustre-release')
  58 GERRIT_BRANCH = os.getenv('GERRIT_BRANCH', 'master')
  59 GERRIT_AUTH_PATH = os.getenv('GERRIT_AUTH_PATH', 'GERRIT_AUTH')
  60
  61 # GERRIT_AUTH should contain a single JSON dictionary of the form:
  62 # {
  63 #     "review.example.com": {
  64 #         "gerrit/http": {
  65 #             "username": "example-checkpatch",
  66 #             "password": "1234"
  67 #         }
  68 #     }
  69 #     ...
  70 # }
  71
  72 CHECKPATCH_PATHS = _getenv_list('CHECKPATCH_PATHS', ['checkpatch.pl'])
  73 CHECKPATCH_IGNORED_FILES = _getenv_list('CHECKPATCH_IGNORED_FILES', [
  74         'lustre/contrib/wireshark/packet-lustre.c',
  75         'lustre/ptlrpc/wiretest.c',
  76         'lustre/utils/wiretest.c',
  77         '*.patch'])
  78 CHECKPATCH_IGNORED_KINDS = _getenv_list('CHECKPATCH_IGNORED_KINDS', [
  79         'LEADING_SPACE'])
  80 REVIEW_HISTORY_PATH = os.getenv('REVIEW_HISTORY_PATH', 'REVIEW_HISTORY')
  81 STYLE_LINK = os.getenv('STYLE_LINK',
  82         'https://wiki.hpdd.intel.com/display/PUB/Coding+Guidelines')
  83
  84 USE_CODE_REVIEW_SCORE = False
  85
  86 def parse_checkpatch_output(out, path_line_comments, warning_count):
  87     """
  88     Parse string output out of CHECKPATCH into path_line_comments.
  89     Increment warning_count[0] for each warning.
  90
  91     path_line_comments is { PATH: { LINE: [COMMENT, ...] }, ... }.
  92     """
  93     def add_comment(path, line, level, kind, message):
  94         """_"""
  95         logging.debug("add_comment %s %d %s %s '%s'",
  96                       path, line, level, kind, message)
  97         if kind in CHECKPATCH_IGNORED_KINDS:
  98             return
  99
 100         for pattern in CHECKPATCH_IGNORED_FILES:
 101             if fnmatch.fnmatch(path, pattern):
 102                 return
 103
 104         path_comments = path_line_comments.setdefault(path, {})
 105         line_comments = path_comments.setdefault(line, [])
 106         line_comments.append('(style) ' + message)
 107         warning_count[0] += 1
 108
 109     level = None # 'ERROR', 'WARNING'
 110     kind = None # 'CODE_INDENT', 'LEADING_SPACE', ...
 111     message = None # 'code indent should use tabs where possible'
 112
 113     for line in out.splitlines():
 114         # ERROR:CODE_INDENT: code indent should use tabs where possible
 115         # #404: FILE: lustre/liblustre/dir.c:103:
 116         # +        op_data.op_hash_offset = hash_x_index(page->index, 0);$
 117         line = line.strip()
 118         if not line:
 119             level, kind, message = None, None, None
 120         elif line[0] == '#':
 121             # '#404: FILE: lustre/liblustre/dir.c:103:'
 122             tokens = line.split(':', 5)
 123             if len(tokens) != 5 or tokens[1] != ' FILE':
 124                 continue
 125
 126             path = tokens[2].strip()
 127             line_number_str = tokens[3].strip()
 128             if not line_number_str.isdigit():
 129                 continue
 130
 131             line_number = int(line_number_str)
 132
 133             if path and level and kind and message:
 134                 add_comment(path, line_number, level, kind, message)
 135         elif line[0] == '+':
 136             continue
 137         else:
 138             # ERROR:CODE_INDENT: code indent should use tabs where possible
 139             try:
 140                 level, kind, message = line.split(':', 2)
 141             except ValueError:
 142                 level, kind, message = None, None, None
 143
 144             if level != 'ERROR' and level != 'WARNING':
 145                 level, kind, message = None, None, None
 146
 147
 148 def review_input_and_score(path_line_comments, warning_count):
 149     """
 150     Convert { PATH: { LINE: [COMMENT, ...] }, ... }, [11] to a gerrit
 151     ReviewInput() and score
 152     """
 153     review_comments = {}
 154
 155     for path, line_comments in path_line_comments.iteritems():
 156         path_comments = []
 157         for line, comment_list in line_comments.iteritems():
 158             message = '\n'.join(comment_list)
 159             path_comments.append({'line': line, 'message': message})
 160         review_comments[path] = path_comments
 161
 162     if warning_count[0] > 0:
 163         score = -1
 164     else:
 165         score = +1
 166
 167     if USE_CODE_REVIEW_SCORE:
 168         code_review_score = score
 169     else:
 170         code_review_score = 0
 171
 172     if score < 0:
 173         return {
 174             'message': ('%d style warning(s).\nFor more details please see %s' %
 175                         (warning_count[0], STYLE_LINK)),
 176             'labels': {
 177                 'Code-Review': code_review_score
 178                 },
 179             'comments': review_comments,
 180             'notify': 'OWNER',
 181             }, score
 182     else:
 183         return {
 184             'message': 'Looks good to me.',
 185             'labels': {
 186                 'Code-Review': code_review_score
 187                 },
 188             'notify': 'NONE',
 189             }, score
 190
 191
 192 def _now():
 193     """_"""
 194     return long(time.time())
 195
 196
 197 class Reviewer(object):
 198     """
 199     * Poll gerrit instance for updates to changes matching project and branch.
 200     * Pipe new patches through checkpatch.
 201     * Convert checkpatch output to gerrit ReviewInput().
 202     * Post ReviewInput() to gerrit instance.
 203     * Track reviewed revisions in history_path.
 204     """
 205     def __init__(self, host, project, branch, username, password, history_path):
 206         self.host = host
 207         self.project = project
 208         self.branch = branch
 209         self.auth = requests.auth.HTTPDigestAuth(username, password)
 210         self.logger = logging.getLogger(__name__)
 211         self.history_path = history_path
 212         self.history_mode = 'rw'
 213         self.history = {}
 214         self.timestamp = 0L
 215         self.post_enabled = True
 216         self.post_interval = 10
 217         self.update_interval = 300
 218
 219     def _debug(self, msg, *args):
 220         """_"""
 221         self.logger.debug(msg, *args)
 222
 223     def _error(self, msg, *args):
 224         """_"""
 225         self.logger.error(msg, *args)
 226
 227     def _url(self, path):
 228         """_"""
 229         return 'http://' + self.host + '/a' + path
 230
 231     def _get(self, path):
 232         """
 233         GET path return Response.
 234         """
 235         url = self._url(path)
 236         try:
 237             res = requests.get(url, auth=self.auth)
 238         except requests.exceptions.RequestException as exc:
 239             self._error("cannot GET '%s': exception = %s", url, str(exc))
 240             return None
 241
 242         if res.status_code != requests.codes.ok:
 243             self._error("cannot GET '%s': reason = %s, status_code = %d",
 244                        url, res.reason, res.status_code)
 245             return None
 246
 247         return res
 248
 249     def _post(self, path, obj):
 250         """
 251         POST json(obj) to path, return True on success.
 252         """
 253         url = self._url(path)
 254         data = json.dumps(obj)
 255         if not self.post_enabled:
 256             self._debug("_post: disabled: url = '%s', data = '%s'", url, data)
 257             return False
 258
 259         try:
 260             res = requests.post(url, data=data,
 261                                 headers={'Content-Type': 'application/json'},
 262                                 auth=self.auth)
 263         except requests.exceptions.RequestException as exc:
 264             self._error("cannot POST '%s': exception = %s", url, str(exc))
 265             return False
 266
 267         if res.status_code != requests.codes.ok:
 268             self._error("cannot POST '%s': reason = %s, status_code = %d",
 269                        url, res.reason, res.status_code)
 270             return False
 271
 272         return True
 273
 274     def load_history(self):
 275         """
 276         Load review history from history_path containing lines of the form:
 277         EPOCH      FULL_CHANGE_ID                         REVISION    SCORE
 278         1394536722 fs%2Flustre-release~master~I5cc6c23... 00e2cc75... 1
 279         1394536721 -                                      -           0
 280         1394537033 fs%2Flustre-release~master~I10be8e9... 44f7b504... 1
 281         1394537032 -                                      -           0
 282         1394537344 -                                      -           0
 283         ...
 284         """
 285         if 'r' in self.history_mode:
 286             with open(self.history_path) as history_file:
 287                 for line in history_file:
 288                     epoch, change_id, revision, score = line.split()
 289                     if change_id == '-':
 290                         self.timestamp = long(float(epoch))
 291                     else:
 292                         self.history[change_id + ' ' + revision] = score
 293
 294         self._debug("load_history: history size = %d, timestamp = %d",
 295                     len(self.history), self.timestamp)
 296
 297     def write_history(self, change_id, revision, score, epoch=-1):
 298         """
 299         Add review record to history dict and file.
 300         """
 301         if change_id != '-':
 302             self.history[change_id + ' ' + revision] = score
 303
 304         if epoch <= 0:
 305             epoch = self.timestamp
 306
 307         if 'w' in self.history_mode:
 308             with open(self.history_path, 'a') as history_file:
 309                 print >> history_file, epoch, change_id, revision, score
 310
 311     def in_history(self, change_id, revision):
 312         """
 313         Return True if change_id/revision was already reviewed.
 314         """
 315         return change_id + ' ' + revision in self.history
 316
 317     def get_changes(self, query):
 318         """
 319         GET a list of ChangeInfo()s for all changes matching query.
 320
 321         {'status':'open', '-age':'60m'} =>
 322           GET /changes/?q=project:...+status:open+-age:60m&o=CURRENT_REVISION =>
 323             [ChangeInfo()...]
 324         """
 325         query = dict(query)
 326         project = query.get('project', self.project)
 327         query['project'] = urllib.quote(project, safe='')
 328         branch = query.get('branch', self.branch)
 329         query['branch'] = urllib.quote(branch, safe='')
 330         path = ('/changes/?q=' +
 331                 '+'.join(k + ':' + v for k, v in query.iteritems()) +
 332                 '&o=CURRENT_REVISION')
 333         res = self._get(path)
 334         if not res:
 335             return None
 336
 337         # Gerrit uses " )]}'" to guard against XSSI.
 338         return json.loads(res.content[5:])
 339
 340     def decode_patch(self, content):
 341         """
 342         Decode gerrit's idea of base64.
 343
 344         The base64 encoded patch returned by gerrit isn't always
 345         padded correctly according to b64decode. Don't know why. Work
 346         around this by appending more '=' characters or truncating the
 347         content until it decodes. But do try the unmodified content
 348         first.
 349         """
 350         for i in (0, 1, 2, 3, -1, -2, -3):
 351             if i >= 0:
 352                 padded_content = content + (i * '=')
 353             else:
 354                 padded_content = content[:i]
 355
 356             try:
 357                 return base64.b64decode(padded_content)
 358             except TypeError as exc:
 359                 self._debug("decode_patch: len = %d, exception = %s",
 360                            len(padded_content), str(exc))
 361         else:
 362             return ''
 363
 364     def get_patch(self, change, revision='current'):
 365         """
 366         GET and decode the (current) patch for change.
 367         """
 368         path = '/changes/' + change['id'] + '/revisions/' + revision + '/patch'
 369         self._debug("get_patch: path = '%s'", path)
 370         res = self._get(path)
 371         if not res:
 372             return ''
 373
 374         self._debug("get_patch: len(content) = %d, content = '%s...'",
 375                    len(res.content), res.content[:20])
 376
 377         return self.decode_patch(res.content)
 378
 379     def set_review(self, change, revision, review_input):
 380         """
 381         POST review_input for the given revision of change.
 382         """
 383         path = '/changes/' + change['id'] + '/revisions/' + revision + '/review'
 384         self._debug("set_review: path = '%s'", path)
 385         return self._post(path, review_input)
 386
 387     def check_patch(self, patch):
 388         """
 389         Run each script in CHECKPATCH_PATHS on patch, return a
 390         ReviewInput() and score.
 391         """
 392         path_line_comments = {}
 393         warning_count = [0]
 394
 395         for path in CHECKPATCH_PATHS:
 396             pipe = subprocess.Popen([path, '--show-types', '-'],
 397                                     stdin=subprocess.PIPE,
 398                                     stdout=subprocess.PIPE,
 399                                     stderr=subprocess.PIPE)
 400             out, err = pipe.communicate(patch)
 401             self._debug("check_patch: path = %s, out = '%s...', err = '%s...'",
 402                         path, out[:80], err[:80])
 403             parse_checkpatch_output(out, path_line_comments, warning_count)
 404
 405         return review_input_and_score(path_line_comments, warning_count)
 406
 407     def review_change(self, change, force=False):
 408         """
 409         Review the current revision of change.
 410         * Bail if the change isn't open (status is not 'NEW').
 411         * GET the current revision from gerrit.
 412         * Bail if we've already reviewed it (unless force is True).
 413         * Pipe the patch through checkpatch(es).
 414         * Save results to review history.
 415         * POST review to gerrit.
 416         """
 417         self._debug("review_change: change = %s, subject = '%s'",
 418                     change['id'], change.get('subject', ''))
 419
 420         status = change.get('status')
 421         if status != 'NEW':
 422             self._debug("review_change: status = %s", status)
 423             return False
 424
 425         current_revision = change.get('current_revision')
 426         self._debug("review_change: current_revision = '%s'", current_revision)
 427         if not current_revision:
 428             return False
 429
 430         # Have we already checked this revision?
 431         if self.in_history(change['id'], current_revision) and not force:
 432             self._debug("review_change: already reviewed")
 433             return False
 434
 435         patch = self.get_patch(change, current_revision)
 436         if not patch:
 437             self._debug("review_change: no patch")
 438             return False
 439
 440         review_input, score = self.check_patch(patch)
 441         self._debug("review_change: score = %d", score)
 442         self.write_history(change['id'], current_revision, score)
 443         self.set_review(change, current_revision, review_input)
 444         # Don't POST more than every post_interval seconds.
 445         time.sleep(self.post_interval)
 446
 447     def update(self):
 448         """
 449         GET recently updated changes and review as needed.
 450         """
 451         new_timestamp = _now()
 452         age = new_timestamp - self.timestamp + 60 * 60 # 1h padding
 453         self._debug("update: age = %d", age)
 454
 455         open_changes = self.get_changes({'status':'open',
 456                                          '-age':str(age) + 's'})
 457         self._debug("update: got %d open_changes", len(open_changes))
 458
 459         for change in open_changes:
 460             self.review_change(change)
 461
 462         self.timestamp = new_timestamp
 463         self.write_history('-', '-', 0)
 464
 465     def run(self):
 466         """
 467         * Load review history.
 468         * Call update() every poll_interval seconds.
 469         """
 470
 471         if self.timestamp <= 0:
 472             self.load_history()
 473
 474         while True:
 475             self.update()
 476             time.sleep(self.update_interval)
 477
 478
 479 def main():
 480     """_"""
 481     logging.basicConfig(level=logging.DEBUG)
 482
 483     with open(GERRIT_AUTH_PATH) as auth_file:
 484         auth = json.load(auth_file)
 485         username = auth[GERRIT_HOST]['gerrit/http']['username']
 486         password = auth[GERRIT_HOST]['gerrit/http']['password']
 487
 488     reviewer = Reviewer(GERRIT_HOST, GERRIT_PROJECT, GERRIT_BRANCH,
 489                         username, password, REVIEW_HISTORY_PATH)
 490     reviewer.run()
 491
 492
 493 if __name__ == "__main__":
 494     main()