contrib/scripts/gerrit_checkpatch.py

   1 #!/usr/bin/env python
   2 #
   3 # GPL HEADER START
   4 #
   5 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License version 2 only,
   9 # as published by the Free Software Foundation.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License version 2 for more details (a copy is included
  15 # in the LICENSE file that accompanied this code).
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # version 2 along with this program; If not, see
  19 # http://www.gnu.org/licenses/gpl-2.0.html
  20 #
  21 # GPL HEADER END
  22 #
  23 # Copyright (c) 2014, Intel Corporation.
  24 #
  25 # Author: John L. Hammond <john.hammond@intel.com>
  26 #
  27 """
  28 Gerrit Checkpatch Reviewer Daemon
  29 ~~~~~~ ~~~~~~~~~~ ~~~~~~~~ ~~~~~~
  30
  31 * Watch for new change revisions in a gerrit instance.
  32 * Pass new revisions through checkpatch script.
  33 * POST reviews back to gerrit based on checkpatch output.
  34 """
  35
  36 import base64
  37 import fnmatch
  38 import logging
  39 import json
  40 import os
  41 import requests
  42 import subprocess
  43 import time
  44 import urllib
  45
  46 def _getenv_list(key, default=None, sep=':'):
  47     """
  48     'PATH' => ['/bin', '/usr/bin', ...]
  49     """
  50     value = os.getenv(key)
  51     if value is None:
  52         return default
  53     else:
  54         return value.split(sep)
  55
  56 GERRIT_HOST = os.getenv('GERRIT_HOST', 'review.whamcloud.com')
  57 GERRIT_PROJECT = os.getenv('GERRIT_PROJECT', 'fs/lustre-release')
  58 GERRIT_BRANCH = os.getenv('GERRIT_BRANCH', 'master')
  59 GERRIT_AUTH_PATH = os.getenv('GERRIT_AUTH_PATH', 'GERRIT_AUTH')
  60
  61 # GERRIT_AUTH should contain a single JSON dictionary of the form:
  62 # {
  63 #     "review.example.com": {
  64 #         "gerrit/http": {
  65 #             "username": "example-checkpatch",
  66 #             "password": "1234"
  67 #         }
  68 #     }
  69 #     ...
  70 # }
  71
  72 CHECKPATCH_PATHS = _getenv_list('CHECKPATCH_PATHS', ['checkpatch.pl'])
  73 CHECKPATCH_IGNORED_FILES = _getenv_list('CHECKPATCH_IGNORED_FILES', [
  74         'lustre/contrib/wireshark/packet-lustre.c',
  75         'lustre/ptlrpc/wiretest.c',
  76         'lustre/utils/wiretest.c',
  77         '*.patch'])
  78 CHECKPATCH_IGNORED_KINDS = _getenv_list('CHECKPATCH_IGNORED_KINDS', [
  79         'LEADING_SPACE'])
  80 REVIEW_HISTORY_PATH = os.getenv('REVIEW_HISTORY_PATH', 'REVIEW_HISTORY')
  81 STYLE_LINK = os.getenv('STYLE_LINK',
  82         'https://wiki.hpdd.intel.com/display/PUB/Coding+Guidelines')
  83
  84 USE_CODE_REVIEW_SCORE = False
  85
  86 def parse_checkpatch_output(out, path_line_comments, warning_count):
  87     """
  88     Parse string output out of CHECKPATCH into path_line_comments.
  89     Increment warning_count[0] for each warning.
  90
  91     path_line_comments is { PATH: { LINE: [COMMENT, ...] }, ... }.
  92     """
  93     def add_comment(path, line, level, kind, message):
  94         """_"""
  95         logging.debug("add_comment %s %d %s %s '%s'",
  96                       path, line, level, kind, message)
  97         if kind in CHECKPATCH_IGNORED_KINDS:
  98             return
  99
 100         for pattern in CHECKPATCH_IGNORED_FILES:
 101             if fnmatch.fnmatch(path, pattern):
 102                 return
 103
 104         path_comments = path_line_comments.setdefault(path, {})
 105         line_comments = path_comments.setdefault(line, [])
 106         line_comments.append('(style) ' + message)
 107         warning_count[0] += 1
 108
 109     level = None # 'ERROR', 'WARNING'
 110     kind = None # 'CODE_INDENT', 'LEADING_SPACE', ...
 111     message = None # 'code indent should use tabs where possible'
 112
 113     for line in out.splitlines():
 114         # ERROR:CODE_INDENT: code indent should use tabs where possible
 115         # #404: FILE: lustre/liblustre/dir.c:103:
 116         # +        op_data.op_hash_offset = hash_x_index(page->index, 0);$
 117         line = line.strip()
 118         if not line:
 119             level, kind, message = None, None, None
 120         elif line[0] == '#':
 121             # '#404: FILE: lustre/liblustre/dir.c:103:'
 122             tokens = line.split(':', 5)
 123             if len(tokens) != 5 or tokens[1] != ' FILE':
 124                 continue
 125
 126             path = tokens[2].strip()
 127             line_number_str = tokens[3].strip()
 128             if not line_number_str.isdigit():
 129                 continue
 130
 131             line_number = int(line_number_str)
 132
 133             if path and level and kind and message:
 134                 add_comment(path, line_number, level, kind, message)
 135         elif line[0] == '+':
 136             continue
 137         else:
 138             # ERROR:CODE_INDENT: code indent should use tabs where possible
 139             try:
 140                 level, kind, message = line.split(':', 2)
 141             except ValueError:
 142                 level, kind, message = None, None, None
 143
 144             if level != 'ERROR' and level != 'WARNING':
 145                 level, kind, message = None, None, None
 146
 147
 148 def review_input_and_score(path_line_comments, warning_count):
 149     """
 150     Convert { PATH: { LINE: [COMMENT, ...] }, ... }, [11] to a gerrit
 151     ReviewInput() and score
 152     """
 153     review_comments = {}
 154
 155     for path, line_comments in path_line_comments.iteritems():
 156         path_comments = []
 157         for line, comment_list in line_comments.iteritems():
 158             message = '\n'.join(comment_list)
 159             path_comments.append({'line': line, 'message': message})
 160         review_comments[path] = path_comments
 161
 162     if warning_count[0] > 0:
 163         score = -1
 164     else:
 165         score = +1
 166
 167     if USE_CODE_REVIEW_SCORE:
 168         code_review_score = score
 169     else:
 170         code_review_score = 0
 171
 172     if score < 0:
 173         return {
 174             'message': ('%d style warning(s).\nFor more details please see %s' %
 175                         (warning_count[0], STYLE_LINK)),
 176             'labels': {
 177                 'Code-Review': code_review_score
 178                 },
 179             'comments': review_comments
 180             }, score
 181     else:
 182         return {
 183             'message': 'Looks good to me.',
 184             'labels': {
 185                 'Code-Review': code_review_score
 186                 }
 187             }, score
 188
 189
 190 def _now():
 191     """_"""
 192     return long(time.time())
 193
 194
 195 class Reviewer(object):
 196     """
 197     * Poll gerrit instance for updates to changes matching project and branch.
 198     * Pipe new patches through checkpatch.
 199     * Convert checkpatch output to gerrit ReviewInput().
 200     * Post ReviewInput() to gerrit instance.
 201     * Track reviewed revisions in history_path.
 202     """
 203     def __init__(self, host, project, branch, username, password, history_path):
 204         self.host = host
 205         self.project = project
 206         self.branch = branch
 207         self.auth = requests.auth.HTTPDigestAuth(username, password)
 208         self.logger = logging.getLogger(__name__)
 209         self.history_path = history_path
 210         self.history_mode = 'rw'
 211         self.history = {}
 212         self.timestamp = 0L
 213         self.post_enabled = True
 214         self.post_interval = 10
 215         self.update_interval = 300
 216
 217     def _debug(self, msg, *args):
 218         """_"""
 219         self.logger.debug(msg, *args)
 220
 221     def _error(self, msg, *args):
 222         """_"""
 223         self.logger.error(msg, *args)
 224
 225     def _url(self, path):
 226         """_"""
 227         return 'http://' + self.host + '/a' + path
 228
 229     def _get(self, path):
 230         """
 231         GET path return Response.
 232         """
 233         url = self._url(path)
 234         try:
 235             res = requests.get(url, auth=self.auth)
 236         except requests.exceptions.RequestException as exc:
 237             self._error("cannot GET '%s': exception = %s", url, str(exc))
 238             return None
 239
 240         if res.status_code != requests.codes.ok:
 241             self._error("cannot GET '%s': reason = %s, status_code = %d",
 242                        url, res.reason, res.status_code)
 243             return None
 244
 245         return res
 246
 247     def _post(self, path, obj):
 248         """
 249         POST json(obj) to path, return True on success.
 250         """
 251         url = self._url(path)
 252         data = json.dumps(obj)
 253         if not self.post_enabled:
 254             self._debug("_post: disabled: url = '%s', data = '%s'", url, data)
 255             return False
 256
 257         try:
 258             res = requests.post(url, data=data,
 259                                 headers={'Content-Type': 'application/json'},
 260                                 auth=self.auth)
 261         except requests.exceptions.RequestException as exc:
 262             self._error("cannot POST '%s': exception = %s", url, str(exc))
 263             return False
 264
 265         if res.status_code != requests.codes.ok:
 266             self._error("cannot POST '%s': reason = %s, status_code = %d",
 267                        url, res.reason, res.status_code)
 268             return False
 269
 270         return True
 271
 272     def load_history(self):
 273         """
 274         Load review history from history_path containing lines of the form:
 275         EPOCH      FULL_CHANGE_ID                         REVISION    SCORE
 276         1394536722 fs%2Flustre-release~master~I5cc6c23... 00e2cc75... 1
 277         1394536721 -                                      -           0
 278         1394537033 fs%2Flustre-release~master~I10be8e9... 44f7b504... 1
 279         1394537032 -                                      -           0
 280         1394537344 -                                      -           0
 281         ...
 282         """
 283         if 'r' in self.history_mode:
 284             with open(self.history_path) as history_file:
 285                 for line in history_file:
 286                     epoch, change_id, revision, score = line.split()
 287                     if change_id == '-':
 288                         self.timestamp = long(float(epoch))
 289                     else:
 290                         self.history[change_id + ' ' + revision] = score
 291
 292         self._debug("load_history: history size = %d, timestamp = %d",
 293                     len(self.history), self.timestamp)
 294
 295     def write_history(self, change_id, revision, score, epoch=-1):
 296         """
 297         Add review record to history dict and file.
 298         """
 299         if change_id != '-':
 300             self.history[change_id + ' ' + revision] = score
 301
 302         if epoch <= 0:
 303             epoch = self.timestamp
 304
 305         if 'w' in self.history_mode:
 306             with open(self.history_path, 'a') as history_file:
 307                 print >> history_file, epoch, change_id, revision, score
 308
 309     def in_history(self, change_id, revision):
 310         """
 311         Return True if change_id/revision was already reviewed.
 312         """
 313         return change_id + ' ' + revision in self.history
 314
 315     def get_changes(self, query):
 316         """
 317         GET a list of ChangeInfo()s for all changes matching query.
 318
 319         {'status':'open', '-age':'60m'} =>
 320           GET /changes/?q=project:...+status:open+-age:60m&o=CURRENT_REVISION =>
 321             [ChangeInfo()...]
 322         """
 323         query = dict(query)
 324         project = query.get('project', self.project)
 325         query['project'] = urllib.quote(project, safe='')
 326         branch = query.get('branch', self.branch)
 327         query['branch'] = urllib.quote(branch, safe='')
 328         path = ('/changes/?q=' +
 329                 '+'.join(k + ':' + v for k, v in query.iteritems()) +
 330                 '&o=CURRENT_REVISION')
 331         res = self._get(path)
 332         if not res:
 333             return None
 334
 335         # Gerrit uses " )]}'" to guard against XSSI.
 336         return json.loads(res.content[5:])
 337
 338     def decode_patch(self, content):
 339         """
 340         Decode gerrit's idea of base64.
 341
 342         The base64 encoded patch returned by gerrit isn't always
 343         padded correctly according to b64decode. Don't know why. Work
 344         around this by appending more '=' characters or truncating the
 345         content until it decodes. But do try the unmodified content
 346         first.
 347         """
 348         for i in (0, 1, 2, 3, -1, -2, -3):
 349             if i >= 0:
 350                 padded_content = content + (i * '=')
 351             else:
 352                 padded_content = content[:i]
 353
 354             try:
 355                 return base64.b64decode(padded_content)
 356             except TypeError as exc:
 357                 self._debug("decode_patch: len = %d, exception = %s",
 358                            len(padded_content), str(exc))
 359         else:
 360             return ''
 361
 362     def get_patch(self, change, revision='current'):
 363         """
 364         GET and decode the (current) patch for change.
 365         """
 366         path = '/changes/' + change['id'] + '/revisions/' + revision + '/patch'
 367         self._debug("get_patch: path = '%s'", path)
 368         res = self._get(path)
 369         if not res:
 370             return ''
 371
 372         self._debug("get_patch: len(content) = %d, content = '%s...'",
 373                    len(res.content), res.content[:20])
 374
 375         return self.decode_patch(res.content)
 376
 377     def set_review(self, change, revision, review_input):
 378         """
 379         POST review_input for the given revision of change.
 380         """
 381         path = '/changes/' + change['id'] + '/revisions/' + revision + '/review'
 382         self._debug("set_review: path = '%s'", path)
 383         return self._post(path, review_input)
 384
 385     def check_patch(self, patch):
 386         """
 387         Run each script in CHECKPATCH_PATHS on patch, return a
 388         ReviewInput() and score.
 389         """
 390         path_line_comments = {}
 391         warning_count = [0]
 392
 393         for path in CHECKPATCH_PATHS:
 394             pipe = subprocess.Popen([path, '--show-types', '-'],
 395                                     stdin=subprocess.PIPE,
 396                                     stdout=subprocess.PIPE,
 397                                     stderr=subprocess.PIPE)
 398             out, err = pipe.communicate(patch)
 399             self._debug("check_patch: path = %s, out = '%s...', err = '%s...'",
 400                         path, out[:80], err[:80])
 401             parse_checkpatch_output(out, path_line_comments, warning_count)
 402
 403         return review_input_and_score(path_line_comments, warning_count)
 404
 405     def review_change(self, change, force=False):
 406         """
 407         Review the current revision of change.
 408         * Bail if the change isn't open (status is not 'NEW').
 409         * GET the current revision from gerrit.
 410         * Bail if we've already reviewed it (unless force is True).
 411         * Pipe the patch through checkpatch(es).
 412         * Save results to review history.
 413         * POST review to gerrit.
 414         """
 415         self._debug("review_change: change = %s, subject = '%s'",
 416                     change['id'], change.get('subject', ''))
 417
 418         status = change.get('status')
 419         if status != 'NEW':
 420             self._debug("review_change: status = %s", status)
 421             return False
 422
 423         current_revision = change.get('current_revision')
 424         self._debug("review_change: current_revision = '%s'", current_revision)
 425         if not current_revision:
 426             return False
 427
 428         # Have we already checked this revision?
 429         if self.in_history(change['id'], current_revision) and not force:
 430             self._debug("review_change: already reviewed")
 431             return False
 432
 433         patch = self.get_patch(change, current_revision)
 434         if not patch:
 435             self._debug("review_change: no patch")
 436             return False
 437
 438         review_input, score = self.check_patch(patch)
 439         self._debug("review_change: score = %d", score)
 440         self.write_history(change['id'], current_revision, score)
 441         self.set_review(change, current_revision, review_input)
 442         # Don't POST more than every post_interval seconds.
 443         time.sleep(self.post_interval)
 444
 445     def update(self):
 446         """
 447         GET recently updated changes and review as needed.
 448         """
 449         new_timestamp = _now()
 450         age = new_timestamp - self.timestamp + 60 * 60 # 1h padding
 451         self._debug("update: age = %d", age)
 452
 453         open_changes = self.get_changes({'status':'open',
 454                                          '-age':str(age) + 's'})
 455         self._debug("update: got %d open_changes", len(open_changes))
 456
 457         for change in open_changes:
 458             self.review_change(change)
 459
 460         self.timestamp = new_timestamp
 461         self.write_history('-', '-', 0)
 462
 463     def run(self):
 464         """
 465         * Load review history.
 466         * Call update() every poll_interval seconds.
 467         """
 468
 469         if self.timestamp <= 0:
 470             self.load_history()
 471
 472         while True:
 473             self.update()
 474             time.sleep(self.update_interval)
 475
 476
 477 def main():
 478     """_"""
 479     logging.basicConfig(level=logging.DEBUG)
 480
 481     with open(GERRIT_AUTH_PATH) as auth_file:
 482         auth = json.load(auth_file)
 483         username = auth[GERRIT_HOST]['gerrit/http']['username']
 484         password = auth[GERRIT_HOST]['gerrit/http']['password']
 485
 486     reviewer = Reviewer(GERRIT_HOST, GERRIT_PROJECT, GERRIT_BRANCH,
 487                         username, password, REVIEW_HISTORY_PATH)
 488     reviewer.run()
 489
 490
 491 if __name__ == "__main__":
 492     main()