contrib/scripts/gerrit_checkpatch.py

   1 #!/usr/bin/env python3
   2 # SPDX-License-Identifier: GPL-2.0-only
   3 #
   4 # Copyright (c) 2014, Intel Corporation.
   5 #
   6 # Author: John L. Hammond <john.hammond@intel.com>
   7 #
   8 """
   9 Gerrit Checkpatch Reviewer Daemon
  10 ~~~~~~ ~~~~~~~~~~ ~~~~~~~~ ~~~~~~
  11
  12 * Watch for new change revisions in a gerrit instance.
  13 * Pass new revisions through checkpatch script.
  14 * POST reviews back to gerrit based on checkpatch output.
  15 """
  16
  17 from __future__ import print_function
  18 import base64
  19 import fnmatch
  20 import logging
  21 import json
  22 import os
  23 import requests
  24 import subprocess
  25 import time
  26 from six.moves.urllib.parse import quote
  27
  28 def _getenv_list(key, default=None, sep=':'):
  29     """
  30     'PATH' => ['/bin', '/usr/bin', ...]
  31     """
  32     value = os.getenv(key)
  33     if value is None:
  34         return default
  35     else:
  36         return value.split(sep)
  37
  38 GERRIT_HOST = os.getenv('GERRIT_HOST', 'review.whamcloud.com')
  39 GERRIT_PROJECT = os.getenv('GERRIT_PROJECT', 'fs/lustre-release')
  40 GERRIT_BRANCH = os.getenv('GERRIT_BRANCH', 'master')
  41 GERRIT_AUTH_PATH = os.getenv('GERRIT_AUTH_PATH', 'GERRIT_AUTH')
  42 GERRIT_CHANGE_NUMBER = os.getenv('GERRIT_CHANGE_NUMBER', None)
  43
  44 # GERRIT_AUTH should contain a single JSON dictionary of the form:
  45 # {
  46 #     "review.example.com": {
  47 #         "gerrit/http": {
  48 #             "username": "example-checkpatch",
  49 #             "password": "1234"
  50 #         }
  51 #     }
  52 #     ...
  53 # }
  54
  55 CHECKPATCH_PATHS = _getenv_list('CHECKPATCH_PATHS', ['checkpatch.pl'])
  56 CHECKPATCH_ARGS = os.getenv('CHECKPATCH_ARGS','--show-types -').split(' ')
  57 CHECKPATCH_IGNORED_FILES = _getenv_list('CHECKPATCH_IGNORED_FILES', [
  58         'lustre/ptlrpc/wiretest.c',
  59         'lustre/utils/wiretest.c',
  60         '*.patch'])
  61 CHECKPATCH_IGNORED_KINDS = _getenv_list('CHECKPATCH_IGNORED_KINDS', [
  62         'LASSERT',
  63         'LCONSOLE',
  64         'LEADING_SPACE'])
  65 REVIEW_HISTORY_PATH = os.getenv('REVIEW_HISTORY_PATH', 'REVIEW_HISTORY')
  66 STYLE_LINK = os.getenv('STYLE_LINK',
  67         'http://wiki.lustre.org/Lustre_Coding_Style_Guidelines')
  68
  69 USE_CODE_REVIEW_SCORE = False
  70
  71 def parse_checkpatch_output(out, path_line_comments, warning_count):
  72     """
  73     Parse string output out of CHECKPATCH into path_line_comments.
  74     Increment warning_count[0] for each warning.
  75
  76     path_line_comments is { PATH: { LINE: [COMMENT, ...] }, ... }.
  77     """
  78     def add_comment(path, line, level, kind, message):
  79         """_"""
  80         logging.debug("add_comment %s %d %s %s '%s'",
  81                       path, line, level, kind, message)
  82         if kind in CHECKPATCH_IGNORED_KINDS:
  83             return
  84
  85         for pattern in CHECKPATCH_IGNORED_FILES:
  86             if fnmatch.fnmatch(path, pattern):
  87                 return
  88
  89         path_comments = path_line_comments.setdefault(path, {})
  90         line_comments = path_comments.setdefault(line, [])
  91         line_comments.append('(style) %s\n' % message)
  92         warning_count[0] += 1
  93
  94     level = None # 'ERROR', 'WARNING'
  95     kind = None # 'CODE_INDENT', 'LEADING_SPACE', ...
  96     message = None # 'code indent should use tabs where possible'
  97
  98     for line in out.splitlines():
  99         # ERROR:CODE_INDENT: code indent should use tabs where possible
 100         # #404: FILE: lustre/liblustre/dir.c:103:
 101         # +        op_data.op_hash_offset = hash_x_index(page->index, 0);$
 102         line = line.strip()
 103         if not line:
 104             level, kind, message = None, None, None
 105         elif line[0] == '#':
 106             # '#404: FILE: lustre/liblustre/dir.c:103:'
 107             tokens = line.split(':', 5)
 108             if len(tokens) != 5 or tokens[1] != ' FILE':
 109                 continue
 110
 111             path = tokens[2].strip()
 112             line_number_str = tokens[3].strip()
 113             if not line_number_str.isdigit():
 114                 continue
 115
 116             line_number = int(line_number_str)
 117
 118             if path and level and kind and message:
 119                 add_comment(path, line_number, level, kind, message)
 120         elif line[0] == '+':
 121             continue
 122         else:
 123             # ERROR:CODE_INDENT: code indent should use tabs where possible
 124             try:
 125                 level, kind, message = line.split(':', 2)
 126             except ValueError:
 127                 level, kind, message = None, None, None
 128
 129             if level not in ('ERROR', 'WARNING'):
 130                 level, kind, message = None, None, None
 131
 132
 133 def review_input_and_score(path_line_comments, warning_count):
 134     """
 135     Convert { PATH: { LINE: [COMMENT, ...] }, ... }, [11] to a gerrit
 136     ReviewInput() and score
 137     """
 138     review_comments = {}
 139
 140     for path, line_comments in list(path_line_comments.items()):
 141         path_comments = []
 142         for line, comment_list in list(line_comments.items()):
 143             message = '\n'.join(comment_list)
 144             path_comments.append({'line': line, 'message': message})
 145         review_comments[path] = path_comments
 146
 147     if warning_count[0] > 0:
 148         score = -1
 149     else:
 150         score = +1
 151
 152     if USE_CODE_REVIEW_SCORE:
 153         code_review_score = score
 154     else:
 155         code_review_score = 0
 156
 157     if score < 0:
 158         return {
 159             'message': ('%d style warning(s).\nFor more details please see %s' %
 160                         (warning_count[0], STYLE_LINK)),
 161             'labels': {
 162                 'Code-Review': code_review_score
 163                 },
 164             'comments': review_comments,
 165             'notify': 'OWNER',
 166             }, score
 167     else:
 168         return {
 169             'message': 'Looks good to me.',
 170             'labels': {
 171                 'Code-Review': code_review_score
 172                 },
 173             'notify': 'NONE',
 174             }, score
 175
 176
 177 def _now():
 178     """_"""
 179     return int(time.time())
 180
 181
 182 class Reviewer(object):
 183     """
 184     * Poll gerrit instance for updates to changes matching project and branch.
 185     * Pipe new patches through checkpatch.
 186     * Convert checkpatch output to gerrit ReviewInput().
 187     * Post ReviewInput() to gerrit instance.
 188     * Track reviewed revisions in history_path.
 189     """
 190     def __init__(self, host, project, branch, username, password, history_path):
 191         self.host = host
 192         self.project = project
 193         self.branch = branch
 194         self.auth = requests.auth.HTTPDigestAuth(username, password)
 195         self.logger = logging.getLogger(__name__)
 196         self.history_path = history_path
 197         self.history_mode = 'rw'
 198         self.history = {}
 199         self.timestamp = 0
 200         self.post_enabled = True
 201         self.post_interval = 10
 202         self.update_interval = 300
 203         self.request_timeout = 60
 204
 205     def _debug(self, msg, *args):
 206         """_"""
 207         self.logger.debug(msg, *args)
 208
 209     def _error(self, msg, *args):
 210         """_"""
 211         self.logger.error(msg, *args)
 212
 213     def _url(self, path):
 214         """_"""
 215         return 'http://' + self.host + '/a' + path
 216
 217     def _get(self, path):
 218         """
 219         GET path return Response.
 220         """
 221         url = self._url(path)
 222         try:
 223             res = requests.get(url, auth=self.auth,
 224                                timeout=self.request_timeout)
 225         except Exception as exc:
 226             self._error("cannot GET '%s': exception = %s", url, str(exc))
 227             return None
 228
 229         if res.status_code != requests.codes.ok:
 230             self._error("cannot GET '%s': reason = %s, status_code = %d",
 231                        url, res.reason, res.status_code)
 232             return None
 233
 234         return res
 235
 236     def _post(self, path, obj):
 237         """
 238         POST json(obj) to path, return True on success.
 239         """
 240         url = self._url(path)
 241         data = json.dumps(obj)
 242         if not self.post_enabled:
 243             self._debug("_post: disabled: url = '%s', data = '%s'", url, data)
 244             return False
 245
 246         try:
 247             res = requests.post(url, data=data,
 248                                 headers={'Content-Type': 'application/json'},
 249                                 auth=self.auth, timeout=self.request_timeout)
 250         except Exception as exc:
 251             self._error("cannot POST '%s': exception = %s", url, str(exc))
 252             return False
 253
 254         if res.status_code != requests.codes.ok:
 255             self._error("cannot POST '%s': reason = %s, status_code = %d",
 256                        url, res.reason, res.status_code)
 257             return False
 258
 259         return True
 260
 261     def load_history(self):
 262         """
 263         Load review history from history_path containing lines of the form:
 264         EPOCH      FULL_CHANGE_ID                         REVISION    SCORE
 265         1394536722 fs%2Flustre-release~master~I5cc6c23... 00e2cc75... 1
 266         1394536721 -                                      -           0
 267         1394537033 fs%2Flustre-release~master~I10be8e9... 44f7b504... 1
 268         1394537032 -                                      -           0
 269         1394537344 -                                      -           0
 270         ...
 271         """
 272         if 'r' in self.history_mode:
 273             with open(self.history_path) as history_file:
 274                 for line in history_file:
 275                     epoch, change_id, revision, score = line.split()
 276                     if change_id == '-':
 277                         self.timestamp = int(float(epoch))
 278                     else:
 279                         self.history[change_id + ' ' + revision] = score
 280
 281         self._debug("load_history: history size = %d, timestamp = %d",
 282                     len(self.history), self.timestamp)
 283
 284     def write_history(self, change_id, revision, score, epoch=-1):
 285         """
 286         Add review record to history dict and file.
 287         """
 288         if change_id != '-':
 289             self.history[change_id + ' ' + revision] = score
 290
 291         if epoch <= 0:
 292             epoch = self.timestamp
 293
 294         if 'w' in self.history_mode:
 295             with open(self.history_path, 'a') as history_file:
 296                 print(epoch, change_id, revision, score, file=history_file)
 297
 298     def in_history(self, change_id, revision):
 299         """
 300         Return True if change_id/revision was already reviewed.
 301         """
 302         return change_id + ' ' + revision in self.history
 303
 304     def get_change_by_id(self, change_id):
 305         """
 306         GET one change by id.
 307         """
 308         path = ('/changes/' + quote(self.project, safe='') + '~' +
 309                 quote(self.branch, safe='') + '~' + change_id +
 310                 '?o=CURRENT_REVISION')
 311         res = self._get(path)
 312         if not res:
 313             return None
 314
 315         # Gerrit uses " )]}'" to guard against XSSI.
 316         return json.loads(res.content[5:])
 317
 318     def get_changes(self, query):
 319         """
 320         GET a list of ChangeInfo()s for all changes matching query.
 321
 322         {'status':'open', '-age':'60m'} =>
 323           GET /changes/?q=project:...+status:open+-age:60m&o=CURRENT_REVISION =>
 324             [ChangeInfo()...]
 325         """
 326         query = dict(query)
 327         project = query.get('project', self.project)
 328         query['project'] = quote(project, safe='')
 329         branch = query.get('branch', self.branch)
 330         query['branch'] = quote(branch, safe='')
 331         path = ('/changes/?q=' +
 332                 '+'.join(k + ':' + v for k, v in list(query.items())) +
 333                 '&o=CURRENT_REVISION')
 334         res = self._get(path)
 335         if not res:
 336             return []
 337
 338         # Gerrit uses " )]}'" to guard against XSSI.
 339         return json.loads(res.content[5:])
 340
 341     def decode_patch(self, content):
 342         """
 343         Decode gerrit's idea of base64.
 344
 345         The base64 encoded patch returned by gerrit isn't always
 346         padded correctly according to b64decode. Don't know why. Work
 347         around this by appending more '=' characters or truncating the
 348         content until it decodes. But do try the unmodified content
 349         first.
 350         """
 351         for i in (0, 1, 2, 3, -1, -2, -3):
 352             if i >= 0:
 353                 padded_content = content + (i * '=')
 354             else:
 355                 padded_content = content[:i]
 356
 357             try:
 358                 return base64.b64decode(padded_content)
 359             except TypeError as exc:
 360                 self._debug("decode_patch: len = %d, exception = %s",
 361                            len(padded_content), str(exc))
 362         else:
 363             return ''
 364
 365     def get_patch(self, change, revision='current'):
 366         """
 367         GET and decode the (current) patch for change.
 368         """
 369         path = '/changes/' + change['id'] + '/revisions/' + revision + '/patch'
 370         self._debug("get_patch: path = '%s'", path)
 371         res = self._get(path)
 372         if not res:
 373             return ''
 374
 375         self._debug("get_patch: len(content) = %d, content = '%s...'",
 376                    len(res.content), res.content[:20])
 377
 378         return self.decode_patch(res.content)
 379
 380     def post_review(self, change, revision, review_input):
 381         """
 382         POST review_input for the given revision of change.
 383         """
 384         path = '/changes/' + change['id'] + '/revisions/' + revision + '/review'
 385         self._debug("post_review: path = '%s'", path)
 386         return self._post(path, review_input)
 387
 388     def check_patch(self, patch):
 389         """
 390         Run each script in CHECKPATCH_PATHS on patch, return a
 391         ReviewInput() and score.
 392         """
 393         path_line_comments = {}
 394         warning_count = [0]
 395
 396         for path in CHECKPATCH_PATHS:
 397             pipe = subprocess.Popen([path] + CHECKPATCH_ARGS,
 398                                     stdin=subprocess.PIPE,
 399                                     stdout=subprocess.PIPE,
 400                                     stderr=subprocess.PIPE)
 401             out, err = pipe.communicate(patch)
 402             self._debug("check_patch: path = %s, out = '%s...', err = '%s...'",
 403                         path, out[:80], err[:80])
 404             parse_checkpatch_output(out, path_line_comments, warning_count)
 405
 406         return review_input_and_score(path_line_comments, warning_count)
 407
 408     def change_needs_review(self, change):
 409         """
 410         * Bail if the change isn't open (status is not 'NEW').
 411         * Bail if we've already reviewed the current revision.
 412         """
 413         status = change.get('status')
 414         if status != 'NEW':
 415             self._debug("change_needs_review: status = %s", status)
 416             return False
 417
 418         current_revision = change.get('current_revision')
 419         self._debug("change_needs_review: current_revision = '%s'",
 420                     current_revision)
 421         if not current_revision:
 422             return False
 423
 424         # Have we already checked this revision?
 425         if self.in_history(change['id'], current_revision):
 426             self._debug("change_needs_review: already reviewed")
 427             return False
 428
 429         return True
 430
 431     def review_change(self, change):
 432         """
 433         Review the current revision of change.
 434         * Pipe the patch through checkpatch(es).
 435         * Save results to review history.
 436         * POST review to gerrit.
 437         """
 438         self._debug("review_change: change = %s, subject = '%s'",
 439                     change['id'], change.get('subject', ''))
 440
 441         current_revision = change.get('current_revision')
 442         self._debug("change_needs_review: current_revision = '%s'",
 443                     current_revision)
 444         if not current_revision:
 445             return
 446
 447         patch = self.get_patch(change, current_revision)
 448         if not patch:
 449             self._debug("review_change: no patch")
 450             return
 451
 452         review_input, score = self.check_patch(patch)
 453         self._debug("review_change: score = %d", score)
 454         self.write_history(change['id'], current_revision, score)
 455         self.post_review(change, current_revision, review_input)
 456
 457     def update(self):
 458         """
 459         GET recently updated changes and review as needed.
 460         """
 461         new_timestamp = _now()
 462         age = new_timestamp - self.timestamp + 60 * 60 # 1h padding
 463         self._debug("update: age = %d", age)
 464
 465         open_changes = self.get_changes({'status':'open',
 466                                          '-age':str(age) + 's'})
 467         self._debug("update: got %d open_changes", len(open_changes))
 468
 469         for change in open_changes:
 470             if self.change_needs_review(change):
 471                 self.review_change(change)
 472                 # Don't POST more than every post_interval seconds.
 473                 time.sleep(self.post_interval)
 474
 475         self.timestamp = new_timestamp
 476         self.write_history('-', '-', 0)
 477
 478     def update_single_change(self, change):
 479
 480         self.load_history()
 481
 482         open_changes = self.get_changes({'status':'open',
 483                                          'change':change})
 484         self._debug("update: got %d open_changes", len(open_changes))
 485
 486         for change in open_changes:
 487             if self.change_needs_review(change):
 488                 self.review_change(change)
 489
 490     def run(self):
 491         """
 492         * Load review history.
 493         * Call update() every poll_interval seconds.
 494         """
 495
 496         if self.timestamp <= 0:
 497             self.load_history()
 498
 499         while True:
 500             self.update()
 501             time.sleep(self.update_interval)
 502
 503
 504 def main():
 505     """_"""
 506     logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
 507
 508     with open(GERRIT_AUTH_PATH) as auth_file:
 509         auth = json.load(auth_file)
 510         username = auth[GERRIT_HOST]['gerrit/http']['username']
 511         password = auth[GERRIT_HOST]['gerrit/http']['password']
 512
 513     reviewer = Reviewer(GERRIT_HOST, GERRIT_PROJECT, GERRIT_BRANCH,
 514                         username, password, REVIEW_HISTORY_PATH)
 515
 516     if GERRIT_CHANGE_NUMBER:
 517         reviewer.update_single_change(GERRIT_CHANGE_NUMBER)
 518     else:
 519         reviewer.run()
 520
 521
 522 if __name__ == "__main__":
 523     main()