contrib/scripts/gerrit_checkpatch.py

   1 #!/usr/bin/env python
   2 #
   3 # GPL HEADER START
   4 #
   5 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License version 2 only,
   9 # as published by the Free Software Foundation.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License version 2 for more details (a copy is included
  15 # in the LICENSE file that accompanied this code).
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # version 2 along with this program; If not, see
  19 # http://www.gnu.org/licenses/gpl-2.0.html
  20 #
  21 # GPL HEADER END
  22 #
  23 # Copyright (c) 2014, Intel Corporation.
  24 #
  25 # Author: John L. Hammond <john.hammond@intel.com>
  26 #
  27 """
  28 Gerrit Checkpatch Reviewer Daemon
  29 ~~~~~~ ~~~~~~~~~~ ~~~~~~~~ ~~~~~~
  30
  31 * Watch for new change revisions in a gerrit instance.
  32 * Pass new revisions through checkpatch script.
  33 * POST reviews back to gerrit based on checkpatch output.
  34 """
  35
  36 import base64
  37 import fnmatch
  38 import logging
  39 import json
  40 import os
  41 import requests
  42 import subprocess
  43 import time
  44 import urllib
  45
  46 def _getenv_list(key, default=None, sep=':'):
  47     """
  48     'PATH' => ['/bin', '/usr/bin', ...]
  49     """
  50     value = os.getenv(key)
  51     if value is None:
  52         return default
  53     else:
  54         return value.split(sep)
  55
  56 GERRIT_HOST = os.getenv('GERRIT_HOST', 'review.whamcloud.com')
  57 GERRIT_PROJECT = os.getenv('GERRIT_PROJECT', 'fs/lustre-release')
  58 GERRIT_BRANCH = os.getenv('GERRIT_BRANCH', 'master')
  59 GERRIT_AUTH_PATH = os.getenv('GERRIT_AUTH_PATH', 'GERRIT_AUTH')
  60
  61 # GERRIT_AUTH should contain a single JSON dictionary of the form:
  62 # {
  63 #     "review.example.com": {
  64 #         "gerrit/http": {
  65 #             "username": "example-checkpatch",
  66 #             "password": "1234"
  67 #         }
  68 #     }
  69 #     ...
  70 # }
  71
  72 CHECKPATCH_PATHS = _getenv_list('CHECKPATCH_PATHS', ['checkpatch.pl'])
  73 CHECKPATCH_IGNORED_FILES = _getenv_list('CHECKPATCH_IGNORED_FILES', [
  74         'lustre/contrib/wireshark/packet-lustre.c',
  75         'lustre/ptlrpc/wiretest.c',
  76         'lustre/utils/wiretest.c',
  77         '*.patch'])
  78 CHECKPATCH_IGNORED_KINDS = _getenv_list('CHECKPATCH_IGNORED_KINDS', [
  79         'LASSERT',
  80         'LCONSOLE',
  81         'LEADING_SPACE'])
  82 REVIEW_HISTORY_PATH = os.getenv('REVIEW_HISTORY_PATH', 'REVIEW_HISTORY')
  83 STYLE_LINK = os.getenv('STYLE_LINK',
  84         'https://wiki.hpdd.intel.com/display/PUB/Coding+Guidelines')
  85
  86 USE_CODE_REVIEW_SCORE = False
  87
  88 def parse_checkpatch_output(out, path_line_comments, warning_count):
  89     """
  90     Parse string output out of CHECKPATCH into path_line_comments.
  91     Increment warning_count[0] for each warning.
  92
  93     path_line_comments is { PATH: { LINE: [COMMENT, ...] }, ... }.
  94     """
  95     def add_comment(path, line, level, kind, message):
  96         """_"""
  97         logging.debug("add_comment %s %d %s %s '%s'",
  98                       path, line, level, kind, message)
  99         if kind in CHECKPATCH_IGNORED_KINDS:
 100             return
 101
 102         for pattern in CHECKPATCH_IGNORED_FILES:
 103             if fnmatch.fnmatch(path, pattern):
 104                 return
 105
 106         path_comments = path_line_comments.setdefault(path, {})
 107         line_comments = path_comments.setdefault(line, [])
 108         line_comments.append('(style) ' + message)
 109         warning_count[0] += 1
 110
 111     level = None # 'ERROR', 'WARNING'
 112     kind = None # 'CODE_INDENT', 'LEADING_SPACE', ...
 113     message = None # 'code indent should use tabs where possible'
 114
 115     for line in out.splitlines():
 116         # ERROR:CODE_INDENT: code indent should use tabs where possible
 117         # #404: FILE: lustre/liblustre/dir.c:103:
 118         # +        op_data.op_hash_offset = hash_x_index(page->index, 0);$
 119         line = line.strip()
 120         if not line:
 121             level, kind, message = None, None, None
 122         elif line[0] == '#':
 123             # '#404: FILE: lustre/liblustre/dir.c:103:'
 124             tokens = line.split(':', 5)
 125             if len(tokens) != 5 or tokens[1] != ' FILE':
 126                 continue
 127
 128             path = tokens[2].strip()
 129             line_number_str = tokens[3].strip()
 130             if not line_number_str.isdigit():
 131                 continue
 132
 133             line_number = int(line_number_str)
 134
 135             if path and level and kind and message:
 136                 add_comment(path, line_number, level, kind, message)
 137         elif line[0] == '+':
 138             continue
 139         else:
 140             # ERROR:CODE_INDENT: code indent should use tabs where possible
 141             try:
 142                 level, kind, message = line.split(':', 2)
 143             except ValueError:
 144                 level, kind, message = None, None, None
 145
 146             if level != 'ERROR' and level != 'WARNING':
 147                 level, kind, message = None, None, None
 148
 149
 150 def review_input_and_score(path_line_comments, warning_count):
 151     """
 152     Convert { PATH: { LINE: [COMMENT, ...] }, ... }, [11] to a gerrit
 153     ReviewInput() and score
 154     """
 155     review_comments = {}
 156
 157     for path, line_comments in path_line_comments.iteritems():
 158         path_comments = []
 159         for line, comment_list in line_comments.iteritems():
 160             message = '\n'.join(comment_list)
 161             path_comments.append({'line': line, 'message': message})
 162         review_comments[path] = path_comments
 163
 164     if warning_count[0] > 0:
 165         score = -1
 166     else:
 167         score = +1
 168
 169     if USE_CODE_REVIEW_SCORE:
 170         code_review_score = score
 171     else:
 172         code_review_score = 0
 173
 174     if score < 0:
 175         return {
 176             'message': ('%d style warning(s).\nFor more details please see %s' %
 177                         (warning_count[0], STYLE_LINK)),
 178             'labels': {
 179                 'Code-Review': code_review_score
 180                 },
 181             'comments': review_comments,
 182             'notify': 'OWNER',
 183             }, score
 184     else:
 185         return {
 186             'message': 'Looks good to me.',
 187             'labels': {
 188                 'Code-Review': code_review_score
 189                 },
 190             'notify': 'NONE',
 191             }, score
 192
 193
 194 def _now():
 195     """_"""
 196     return long(time.time())
 197
 198
 199 class Reviewer(object):
 200     """
 201     * Poll gerrit instance for updates to changes matching project and branch.
 202     * Pipe new patches through checkpatch.
 203     * Convert checkpatch output to gerrit ReviewInput().
 204     * Post ReviewInput() to gerrit instance.
 205     * Track reviewed revisions in history_path.
 206     """
 207     def __init__(self, host, project, branch, username, password, history_path):
 208         self.host = host
 209         self.project = project
 210         self.branch = branch
 211         self.auth = requests.auth.HTTPDigestAuth(username, password)
 212         self.logger = logging.getLogger(__name__)
 213         self.history_path = history_path
 214         self.history_mode = 'rw'
 215         self.history = {}
 216         self.timestamp = 0L
 217         self.post_enabled = True
 218         self.post_interval = 10
 219         self.update_interval = 300
 220         self.request_timeout = 60
 221
 222     def _debug(self, msg, *args):
 223         """_"""
 224         self.logger.debug(msg, *args)
 225
 226     def _error(self, msg, *args):
 227         """_"""
 228         self.logger.error(msg, *args)
 229
 230     def _url(self, path):
 231         """_"""
 232         return 'http://' + self.host + '/a' + path
 233
 234     def _get(self, path):
 235         """
 236         GET path return Response.
 237         """
 238         url = self._url(path)
 239         try:
 240             res = requests.get(url, auth=self.auth,
 241                                timeout=self.request_timeout)
 242         except Exception as exc:
 243             self._error("cannot GET '%s': exception = %s", url, str(exc))
 244             return None
 245
 246         if res.status_code != requests.codes.ok:
 247             self._error("cannot GET '%s': reason = %s, status_code = %d",
 248                        url, res.reason, res.status_code)
 249             return None
 250
 251         return res
 252
 253     def _post(self, path, obj):
 254         """
 255         POST json(obj) to path, return True on success.
 256         """
 257         url = self._url(path)
 258         data = json.dumps(obj)
 259         if not self.post_enabled:
 260             self._debug("_post: disabled: url = '%s', data = '%s'", url, data)
 261             return False
 262
 263         try:
 264             res = requests.post(url, data=data,
 265                                 headers={'Content-Type': 'application/json'},
 266                                 auth=self.auth, timeout=self.request_timeout)
 267         except Exception as exc:
 268             self._error("cannot POST '%s': exception = %s", url, str(exc))
 269             return False
 270
 271         if res.status_code != requests.codes.ok:
 272             self._error("cannot POST '%s': reason = %s, status_code = %d",
 273                        url, res.reason, res.status_code)
 274             return False
 275
 276         return True
 277
 278     def load_history(self):
 279         """
 280         Load review history from history_path containing lines of the form:
 281         EPOCH      FULL_CHANGE_ID                         REVISION    SCORE
 282         1394536722 fs%2Flustre-release~master~I5cc6c23... 00e2cc75... 1
 283         1394536721 -                                      -           0
 284         1394537033 fs%2Flustre-release~master~I10be8e9... 44f7b504... 1
 285         1394537032 -                                      -           0
 286         1394537344 -                                      -           0
 287         ...
 288         """
 289         if 'r' in self.history_mode:
 290             with open(self.history_path) as history_file:
 291                 for line in history_file:
 292                     epoch, change_id, revision, score = line.split()
 293                     if change_id == '-':
 294                         self.timestamp = long(float(epoch))
 295                     else:
 296                         self.history[change_id + ' ' + revision] = score
 297
 298         self._debug("load_history: history size = %d, timestamp = %d",
 299                     len(self.history), self.timestamp)
 300
 301     def write_history(self, change_id, revision, score, epoch=-1):
 302         """
 303         Add review record to history dict and file.
 304         """
 305         if change_id != '-':
 306             self.history[change_id + ' ' + revision] = score
 307
 308         if epoch <= 0:
 309             epoch = self.timestamp
 310
 311         if 'w' in self.history_mode:
 312             with open(self.history_path, 'a') as history_file:
 313                 print >> history_file, epoch, change_id, revision, score
 314
 315     def in_history(self, change_id, revision):
 316         """
 317         Return True if change_id/revision was already reviewed.
 318         """
 319         return change_id + ' ' + revision in self.history
 320
 321     def get_change_by_id(self, change_id):
 322         """
 323         GET one change by id.
 324         """
 325         path = ('/changes/' + urllib.quote(self.project, safe='') + '~' +
 326                 urllib.quote(self.branch, safe='') + '~' + change_id +
 327                 '?o=CURRENT_REVISION')
 328         res = self._get(path)
 329         if not res:
 330             return None
 331
 332         # Gerrit uses " )]}'" to guard against XSSI.
 333         return json.loads(res.content[5:])
 334
 335     def get_changes(self, query):
 336         """
 337         GET a list of ChangeInfo()s for all changes matching query.
 338
 339         {'status':'open', '-age':'60m'} =>
 340           GET /changes/?q=project:...+status:open+-age:60m&o=CURRENT_REVISION =>
 341             [ChangeInfo()...]
 342         """
 343         query = dict(query)
 344         project = query.get('project', self.project)
 345         query['project'] = urllib.quote(project, safe='')
 346         branch = query.get('branch', self.branch)
 347         query['branch'] = urllib.quote(branch, safe='')
 348         path = ('/changes/?q=' +
 349                 '+'.join(k + ':' + v for k, v in query.iteritems()) +
 350                 '&o=CURRENT_REVISION')
 351         res = self._get(path)
 352         if not res:
 353             return []
 354
 355         # Gerrit uses " )]}'" to guard against XSSI.
 356         return json.loads(res.content[5:])
 357
 358     def decode_patch(self, content):
 359         """
 360         Decode gerrit's idea of base64.
 361
 362         The base64 encoded patch returned by gerrit isn't always
 363         padded correctly according to b64decode. Don't know why. Work
 364         around this by appending more '=' characters or truncating the
 365         content until it decodes. But do try the unmodified content
 366         first.
 367         """
 368         for i in (0, 1, 2, 3, -1, -2, -3):
 369             if i >= 0:
 370                 padded_content = content + (i * '=')
 371             else:
 372                 padded_content = content[:i]
 373
 374             try:
 375                 return base64.b64decode(padded_content)
 376             except TypeError as exc:
 377                 self._debug("decode_patch: len = %d, exception = %s",
 378                            len(padded_content), str(exc))
 379         else:
 380             return ''
 381
 382     def get_patch(self, change, revision='current'):
 383         """
 384         GET and decode the (current) patch for change.
 385         """
 386         path = '/changes/' + change['id'] + '/revisions/' + revision + '/patch'
 387         self._debug("get_patch: path = '%s'", path)
 388         res = self._get(path)
 389         if not res:
 390             return ''
 391
 392         self._debug("get_patch: len(content) = %d, content = '%s...'",
 393                    len(res.content), res.content[:20])
 394
 395         return self.decode_patch(res.content)
 396
 397     def post_review(self, change, revision, review_input):
 398         """
 399         POST review_input for the given revision of change.
 400         """
 401         path = '/changes/' + change['id'] + '/revisions/' + revision + '/review'
 402         self._debug("post_review: path = '%s'", path)
 403         return self._post(path, review_input)
 404
 405     def check_patch(self, patch):
 406         """
 407         Run each script in CHECKPATCH_PATHS on patch, return a
 408         ReviewInput() and score.
 409         """
 410         path_line_comments = {}
 411         warning_count = [0]
 412
 413         for path in CHECKPATCH_PATHS:
 414             pipe = subprocess.Popen([path, '--show-types', '-'],
 415                                     stdin=subprocess.PIPE,
 416                                     stdout=subprocess.PIPE,
 417                                     stderr=subprocess.PIPE)
 418             out, err = pipe.communicate(patch)
 419             self._debug("check_patch: path = %s, out = '%s...', err = '%s...'",
 420                         path, out[:80], err[:80])
 421             parse_checkpatch_output(out, path_line_comments, warning_count)
 422
 423         return review_input_and_score(path_line_comments, warning_count)
 424
 425     def change_needs_review(self, change):
 426         """
 427         * Bail if the change isn't open (status is not 'NEW').
 428         * Bail if we've already reviewed the current revision.
 429         """
 430         status = change.get('status')
 431         if status != 'NEW':
 432             self._debug("change_needs_review: status = %s", status)
 433             return False
 434
 435         current_revision = change.get('current_revision')
 436         self._debug("change_needs_review: current_revision = '%s'",
 437                     current_revision)
 438         if not current_revision:
 439             return False
 440
 441         # Have we already checked this revision?
 442         if self.in_history(change['id'], current_revision):
 443             self._debug("change_needs_review: already reviewed")
 444             return False
 445
 446         return True
 447
 448     def review_change(self, change):
 449         """
 450         Review the current revision of change.
 451         * Pipe the patch through checkpatch(es).
 452         * Save results to review history.
 453         * POST review to gerrit.
 454         """
 455         self._debug("review_change: change = %s, subject = '%s'",
 456                     change['id'], change.get('subject', ''))
 457
 458         current_revision = change.get('current_revision')
 459         self._debug("change_needs_review: current_revision = '%s'",
 460                     current_revision)
 461         if not current_revision:
 462             return
 463
 464         patch = self.get_patch(change, current_revision)
 465         if not patch:
 466             self._debug("review_change: no patch")
 467             return
 468
 469         review_input, score = self.check_patch(patch)
 470         self._debug("review_change: score = %d", score)
 471         self.write_history(change['id'], current_revision, score)
 472         self.post_review(change, current_revision, review_input)
 473
 474     def update(self):
 475         """
 476         GET recently updated changes and review as needed.
 477         """
 478         new_timestamp = _now()
 479         age = new_timestamp - self.timestamp + 60 * 60 # 1h padding
 480         self._debug("update: age = %d", age)
 481
 482         open_changes = self.get_changes({'status':'open',
 483                                          '-age':str(age) + 's'})
 484         self._debug("update: got %d open_changes", len(open_changes))
 485
 486         for change in open_changes:
 487             if self.change_needs_review(change):
 488                 self.review_change(change)
 489                 # Don't POST more than every post_interval seconds.
 490                 time.sleep(self.post_interval)
 491
 492         self.timestamp = new_timestamp
 493         self.write_history('-', '-', 0)
 494
 495     def run(self):
 496         """
 497         * Load review history.
 498         * Call update() every poll_interval seconds.
 499         """
 500
 501         if self.timestamp <= 0:
 502             self.load_history()
 503
 504         while True:
 505             self.update()
 506             time.sleep(self.update_interval)
 507
 508
 509 def main():
 510     """_"""
 511     logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
 512
 513     with open(GERRIT_AUTH_PATH) as auth_file:
 514         auth = json.load(auth_file)
 515         username = auth[GERRIT_HOST]['gerrit/http']['username']
 516         password = auth[GERRIT_HOST]['gerrit/http']['password']
 517
 518     reviewer = Reviewer(GERRIT_HOST, GERRIT_PROJECT, GERRIT_BRANCH,
 519                         username, password, REVIEW_HISTORY_PATH)
 520     reviewer.run()
 521
 522
 523 if __name__ == "__main__":
 524     main()