Whamcloud - gitweb
LU-17000 utils: handle_yaml_no_op() has wrong signature
[fs/lustre-release.git] / contrib / scripts / gerrit_checkpatch.py
1 #!/usr/bin/env python3
2 # SPDX-License-Identifier: GPL-2.0-only
3 #
4 # Copyright (c) 2014, Intel Corporation.
5 #
6 # Author: John L. Hammond <john.hammond@intel.com>
7 #
8 """
9 Gerrit Checkpatch Reviewer Daemon
10 ~~~~~~ ~~~~~~~~~~ ~~~~~~~~ ~~~~~~
11
12 * Watch for new change revisions in a gerrit instance.
13 * Pass new revisions through checkpatch script.
14 * POST reviews back to gerrit based on checkpatch output.
15 """
16
17 from __future__ import print_function
18 import base64
19 import fnmatch
20 import logging
21 import json
22 import os
23 import requests
24 import subprocess
25 import time
26 from six.moves.urllib.parse import quote
27
28 def _getenv_list(key, default=None, sep=':'):
29     """
30     'PATH' => ['/bin', '/usr/bin', ...]
31     """
32     value = os.getenv(key)
33     if value is None:
34         return default
35     else:
36         return value.split(sep)
37
38 GERRIT_HOST = os.getenv('GERRIT_HOST', 'review.whamcloud.com')
39 GERRIT_PROJECT = os.getenv('GERRIT_PROJECT', 'fs/lustre-release')
40 GERRIT_BRANCH = os.getenv('GERRIT_BRANCH', 'master')
41 GERRIT_AUTH_PATH = os.getenv('GERRIT_AUTH_PATH', 'GERRIT_AUTH')
42 GERRIT_CHANGE_NUMBER = os.getenv('GERRIT_CHANGE_NUMBER', None)
43
44 # GERRIT_AUTH should contain a single JSON dictionary of the form:
45 # {
46 #     "review.example.com": {
47 #         "gerrit/http": {
48 #             "username": "example-checkpatch",
49 #             "password": "1234"
50 #         }
51 #     }
52 #     ...
53 # }
54
55 CHECKPATCH_PATHS = _getenv_list('CHECKPATCH_PATHS', ['checkpatch.pl'])
56 CHECKPATCH_ARGS = os.getenv('CHECKPATCH_ARGS','--show-types -').split(' ')
57 CHECKPATCH_IGNORED_FILES = _getenv_list('CHECKPATCH_IGNORED_FILES', [
58         'lustre/ptlrpc/wiretest.c',
59         'lustre/utils/wiretest.c',
60         '*.patch'])
61 CHECKPATCH_IGNORED_KINDS = _getenv_list('CHECKPATCH_IGNORED_KINDS', [
62         'LASSERT',
63         'LCONSOLE',
64         'LEADING_SPACE'])
65 REVIEW_HISTORY_PATH = os.getenv('REVIEW_HISTORY_PATH', 'REVIEW_HISTORY')
66 STYLE_LINK = os.getenv('STYLE_LINK',
67         'http://wiki.lustre.org/Lustre_Coding_Style_Guidelines')
68
69 USE_CODE_REVIEW_SCORE = False
70
71 def parse_checkpatch_output(out, path_line_comments, warning_count):
72     """
73     Parse string output out of CHECKPATCH into path_line_comments.
74     Increment warning_count[0] for each warning.
75
76     path_line_comments is { PATH: { LINE: [COMMENT, ...] }, ... }.
77     """
78     def add_comment(path, line, level, kind, message):
79         """_"""
80         logging.debug("add_comment %s %d %s %s '%s'",
81                       path, line, level, kind, message)
82         if kind in CHECKPATCH_IGNORED_KINDS:
83             return
84
85         for pattern in CHECKPATCH_IGNORED_FILES:
86             if fnmatch.fnmatch(path, pattern):
87                 return
88
89         path_comments = path_line_comments.setdefault(path, {})
90         line_comments = path_comments.setdefault(line, [])
91         line_comments.append('(style) %s\n' % message)
92         warning_count[0] += 1
93
94     level = None # 'ERROR', 'WARNING'
95     kind = None # 'CODE_INDENT', 'LEADING_SPACE', ...
96     message = None # 'code indent should use tabs where possible'
97
98     for line in out.splitlines():
99         # ERROR:CODE_INDENT: code indent should use tabs where possible
100         # #404: FILE: lustre/liblustre/dir.c:103:
101         # +        op_data.op_hash_offset = hash_x_index(page->index, 0);$
102         line = line.strip()
103         if not line:
104             level, kind, message = None, None, None
105         elif line[0] == '#':
106             # '#404: FILE: lustre/liblustre/dir.c:103:'
107             tokens = line.split(':', 5)
108             if len(tokens) != 5 or tokens[1] != ' FILE':
109                 continue
110
111             path = tokens[2].strip()
112             line_number_str = tokens[3].strip()
113             if not line_number_str.isdigit():
114                 continue
115
116             line_number = int(line_number_str)
117
118             if path and level and kind and message:
119                 add_comment(path, line_number, level, kind, message)
120         elif line[0] == '+':
121             continue
122         else:
123             # ERROR:CODE_INDENT: code indent should use tabs where possible
124             try:
125                 level, kind, message = line.split(':', 2)
126             except ValueError:
127                 level, kind, message = None, None, None
128
129             if level not in ('ERROR', 'WARNING'):
130                 level, kind, message = None, None, None
131
132
133 def review_input_and_score(path_line_comments, warning_count):
134     """
135     Convert { PATH: { LINE: [COMMENT, ...] }, ... }, [11] to a gerrit
136     ReviewInput() and score
137     """
138     review_comments = {}
139
140     for path, line_comments in list(path_line_comments.items()):
141         path_comments = []
142         for line, comment_list in list(line_comments.items()):
143             message = '\n'.join(comment_list)
144             path_comments.append({'line': line, 'message': message})
145         review_comments[path] = path_comments
146
147     if warning_count[0] > 0:
148         score = -1
149     else:
150         score = +1
151
152     if USE_CODE_REVIEW_SCORE:
153         code_review_score = score
154     else:
155         code_review_score = 0
156
157     if score < 0:
158         return {
159             'message': ('%d style warning(s).\nFor more details please see %s' %
160                         (warning_count[0], STYLE_LINK)),
161             'labels': {
162                 'Code-Review': code_review_score
163                 },
164             'comments': review_comments,
165             'notify': 'OWNER',
166             }, score
167     else:
168         return {
169             'message': 'Looks good to me.',
170             'labels': {
171                 'Code-Review': code_review_score
172                 },
173             'notify': 'NONE',
174             }, score
175
176
177 def _now():
178     """_"""
179     return int(time.time())
180
181
182 class Reviewer(object):
183     """
184     * Poll gerrit instance for updates to changes matching project and branch.
185     * Pipe new patches through checkpatch.
186     * Convert checkpatch output to gerrit ReviewInput().
187     * Post ReviewInput() to gerrit instance.
188     * Track reviewed revisions in history_path.
189     """
190     def __init__(self, host, project, branch, username, password, history_path):
191         self.host = host
192         self.project = project
193         self.branch = branch
194         self.auth = requests.auth.HTTPDigestAuth(username, password)
195         self.logger = logging.getLogger(__name__)
196         self.history_path = history_path
197         self.history_mode = 'rw'
198         self.history = {}
199         self.timestamp = 0
200         self.post_enabled = True
201         self.post_interval = 10
202         self.update_interval = 300
203         self.request_timeout = 60
204
205     def _debug(self, msg, *args):
206         """_"""
207         self.logger.debug(msg, *args)
208
209     def _error(self, msg, *args):
210         """_"""
211         self.logger.error(msg, *args)
212
213     def _url(self, path):
214         """_"""
215         return 'http://' + self.host + '/a' + path
216
217     def _get(self, path):
218         """
219         GET path return Response.
220         """
221         url = self._url(path)
222         try:
223             res = requests.get(url, auth=self.auth,
224                                timeout=self.request_timeout)
225         except Exception as exc:
226             self._error("cannot GET '%s': exception = %s", url, str(exc))
227             return None
228
229         if res.status_code != requests.codes.ok:
230             self._error("cannot GET '%s': reason = %s, status_code = %d",
231                        url, res.reason, res.status_code)
232             return None
233
234         return res
235
236     def _post(self, path, obj):
237         """
238         POST json(obj) to path, return True on success.
239         """
240         url = self._url(path)
241         data = json.dumps(obj)
242         if not self.post_enabled:
243             self._debug("_post: disabled: url = '%s', data = '%s'", url, data)
244             return False
245
246         try:
247             res = requests.post(url, data=data,
248                                 headers={'Content-Type': 'application/json'},
249                                 auth=self.auth, timeout=self.request_timeout)
250         except Exception as exc:
251             self._error("cannot POST '%s': exception = %s", url, str(exc))
252             return False
253
254         if res.status_code != requests.codes.ok:
255             self._error("cannot POST '%s': reason = %s, status_code = %d",
256                        url, res.reason, res.status_code)
257             return False
258
259         return True
260
261     def load_history(self):
262         """
263         Load review history from history_path containing lines of the form:
264         EPOCH      FULL_CHANGE_ID                         REVISION    SCORE
265         1394536722 fs%2Flustre-release~master~I5cc6c23... 00e2cc75... 1
266         1394536721 -                                      -           0
267         1394537033 fs%2Flustre-release~master~I10be8e9... 44f7b504... 1
268         1394537032 -                                      -           0
269         1394537344 -                                      -           0
270         ...
271         """
272         if 'r' in self.history_mode:
273             with open(self.history_path) as history_file:
274                 for line in history_file:
275                     epoch, change_id, revision, score = line.split()
276                     if change_id == '-':
277                         self.timestamp = int(float(epoch))
278                     else:
279                         self.history[change_id + ' ' + revision] = score
280
281         self._debug("load_history: history size = %d, timestamp = %d",
282                     len(self.history), self.timestamp)
283
284     def write_history(self, change_id, revision, score, epoch=-1):
285         """
286         Add review record to history dict and file.
287         """
288         if change_id != '-':
289             self.history[change_id + ' ' + revision] = score
290
291         if epoch <= 0:
292             epoch = self.timestamp
293
294         if 'w' in self.history_mode:
295             with open(self.history_path, 'a') as history_file:
296                 print(epoch, change_id, revision, score, file=history_file)
297
298     def in_history(self, change_id, revision):
299         """
300         Return True if change_id/revision was already reviewed.
301         """
302         return change_id + ' ' + revision in self.history
303
304     def get_change_by_id(self, change_id):
305         """
306         GET one change by id.
307         """
308         path = ('/changes/' + quote(self.project, safe='') + '~' +
309                 quote(self.branch, safe='') + '~' + change_id +
310                 '?o=CURRENT_REVISION')
311         res = self._get(path)
312         if not res:
313             return None
314
315         # Gerrit uses " )]}'" to guard against XSSI.
316         return json.loads(res.content[5:])
317
318     def get_changes(self, query):
319         """
320         GET a list of ChangeInfo()s for all changes matching query.
321
322         {'status':'open', '-age':'60m'} =>
323           GET /changes/?q=project:...+status:open+-age:60m&o=CURRENT_REVISION =>
324             [ChangeInfo()...]
325         """
326         query = dict(query)
327         project = query.get('project', self.project)
328         query['project'] = quote(project, safe='')
329         branch = query.get('branch', self.branch)
330         query['branch'] = quote(branch, safe='')
331         path = ('/changes/?q=' +
332                 '+'.join(k + ':' + v for k, v in list(query.items())) +
333                 '&o=CURRENT_REVISION')
334         res = self._get(path)
335         if not res:
336             return []
337
338         # Gerrit uses " )]}'" to guard against XSSI.
339         return json.loads(res.content[5:])
340
341     def decode_patch(self, content):
342         """
343         Decode gerrit's idea of base64.
344
345         The base64 encoded patch returned by gerrit isn't always
346         padded correctly according to b64decode. Don't know why. Work
347         around this by appending more '=' characters or truncating the
348         content until it decodes. But do try the unmodified content
349         first.
350         """
351         for i in (0, 1, 2, 3, -1, -2, -3):
352             if i >= 0:
353                 padded_content = content + (i * '=')
354             else:
355                 padded_content = content[:i]
356
357             try:
358                 return base64.b64decode(padded_content)
359             except TypeError as exc:
360                 self._debug("decode_patch: len = %d, exception = %s",
361                            len(padded_content), str(exc))
362         else:
363             return ''
364
365     def get_patch(self, change, revision='current'):
366         """
367         GET and decode the (current) patch for change.
368         """
369         path = '/changes/' + change['id'] + '/revisions/' + revision + '/patch'
370         self._debug("get_patch: path = '%s'", path)
371         res = self._get(path)
372         if not res:
373             return ''
374
375         self._debug("get_patch: len(content) = %d, content = '%s...'",
376                    len(res.content), res.content[:20])
377
378         return self.decode_patch(res.content)
379
380     def post_review(self, change, revision, review_input):
381         """
382         POST review_input for the given revision of change.
383         """
384         path = '/changes/' + change['id'] + '/revisions/' + revision + '/review'
385         self._debug("post_review: path = '%s'", path)
386         return self._post(path, review_input)
387
388     def check_patch(self, patch):
389         """
390         Run each script in CHECKPATCH_PATHS on patch, return a
391         ReviewInput() and score.
392         """
393         path_line_comments = {}
394         warning_count = [0]
395
396         for path in CHECKPATCH_PATHS:
397             pipe = subprocess.Popen([path] + CHECKPATCH_ARGS,
398                                     stdin=subprocess.PIPE,
399                                     stdout=subprocess.PIPE,
400                                     stderr=subprocess.PIPE)
401             out, err = pipe.communicate(patch)
402             self._debug("check_patch: path = %s, out = '%s...', err = '%s...'",
403                         path, out[:80], err[:80])
404             parse_checkpatch_output(out, path_line_comments, warning_count)
405
406         return review_input_and_score(path_line_comments, warning_count)
407
408     def change_needs_review(self, change):
409         """
410         * Bail if the change isn't open (status is not 'NEW').
411         * Bail if we've already reviewed the current revision.
412         """
413         status = change.get('status')
414         if status != 'NEW':
415             self._debug("change_needs_review: status = %s", status)
416             return False
417
418         current_revision = change.get('current_revision')
419         self._debug("change_needs_review: current_revision = '%s'",
420                     current_revision)
421         if not current_revision:
422             return False
423
424         # Have we already checked this revision?
425         if self.in_history(change['id'], current_revision):
426             self._debug("change_needs_review: already reviewed")
427             return False
428
429         return True
430
431     def review_change(self, change):
432         """
433         Review the current revision of change.
434         * Pipe the patch through checkpatch(es).
435         * Save results to review history.
436         * POST review to gerrit.
437         """
438         self._debug("review_change: change = %s, subject = '%s'",
439                     change['id'], change.get('subject', ''))
440
441         current_revision = change.get('current_revision')
442         self._debug("change_needs_review: current_revision = '%s'",
443                     current_revision)
444         if not current_revision:
445             return
446
447         patch = self.get_patch(change, current_revision)
448         if not patch:
449             self._debug("review_change: no patch")
450             return
451
452         review_input, score = self.check_patch(patch)
453         self._debug("review_change: score = %d", score)
454         self.write_history(change['id'], current_revision, score)
455         self.post_review(change, current_revision, review_input)
456
457     def update(self):
458         """
459         GET recently updated changes and review as needed.
460         """
461         new_timestamp = _now()
462         age = new_timestamp - self.timestamp + 60 * 60 # 1h padding
463         self._debug("update: age = %d", age)
464
465         open_changes = self.get_changes({'status':'open',
466                                          '-age':str(age) + 's'})
467         self._debug("update: got %d open_changes", len(open_changes))
468
469         for change in open_changes:
470             if self.change_needs_review(change):
471                 self.review_change(change)
472                 # Don't POST more than every post_interval seconds.
473                 time.sleep(self.post_interval)
474
475         self.timestamp = new_timestamp
476         self.write_history('-', '-', 0)
477
478     def update_single_change(self, change):
479
480         self.load_history()
481
482         open_changes = self.get_changes({'status':'open',
483                                          'change':change})
484         self._debug("update: got %d open_changes", len(open_changes))
485
486         for change in open_changes:
487             if self.change_needs_review(change):
488                 self.review_change(change)
489
490     def run(self):
491         """
492         * Load review history.
493         * Call update() every poll_interval seconds.
494         """
495
496         if self.timestamp <= 0:
497             self.load_history()
498
499         while True:
500             self.update()
501             time.sleep(self.update_interval)
502
503
504 def main():
505     """_"""
506     logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
507
508     with open(GERRIT_AUTH_PATH) as auth_file:
509         auth = json.load(auth_file)
510         username = auth[GERRIT_HOST]['gerrit/http']['username']
511         password = auth[GERRIT_HOST]['gerrit/http']['password']
512
513     reviewer = Reviewer(GERRIT_HOST, GERRIT_PROJECT, GERRIT_BRANCH,
514                         username, password, REVIEW_HISTORY_PATH)
515
516     if GERRIT_CHANGE_NUMBER:
517         reviewer.update_single_change(GERRIT_CHANGE_NUMBER)
518     else:
519         reviewer.run()
520
521
522 if __name__ == "__main__":
523     main()