Whamcloud - gitweb
LU-17705 ptlrpc: replace synchronize_rcu() with rcu_barrier()
[fs/lustre-release.git] / contrib / scripts / gerrit_checkpatch.py
1 #!/usr/bin/env python3
2 #
3 # GPL HEADER START
4 #
5 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License version 2 only,
9 # as published by the Free Software Foundation.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License version 2 for more details (a copy is included
15 # in the LICENSE file that accompanied this code).
16 #
17 # You should have received a copy of the GNU General Public License
18 # version 2 along with this program; If not, see
19 # http://www.gnu.org/licenses/gpl-2.0.html
20 #
21 # GPL HEADER END
22 #
23 # Copyright (c) 2014, Intel Corporation.
24 #
25 # Author: John L. Hammond <john.hammond@intel.com>
26 #
27 """
28 Gerrit Checkpatch Reviewer Daemon
29 ~~~~~~ ~~~~~~~~~~ ~~~~~~~~ ~~~~~~
30
31 * Watch for new change revisions in a gerrit instance.
32 * Pass new revisions through checkpatch script.
33 * POST reviews back to gerrit based on checkpatch output.
34 """
35
36 from __future__ import print_function
37 import base64
38 import fnmatch
39 import logging
40 import json
41 import os
42 import requests
43 import subprocess
44 import time
45 from six.moves.urllib.parse import quote
46
47 def _getenv_list(key, default=None, sep=':'):
48     """
49     'PATH' => ['/bin', '/usr/bin', ...]
50     """
51     value = os.getenv(key)
52     if value is None:
53         return default
54     else:
55         return value.split(sep)
56
57 GERRIT_HOST = os.getenv('GERRIT_HOST', 'review.whamcloud.com')
58 GERRIT_PROJECT = os.getenv('GERRIT_PROJECT', 'fs/lustre-release')
59 GERRIT_BRANCH = os.getenv('GERRIT_BRANCH', 'master')
60 GERRIT_AUTH_PATH = os.getenv('GERRIT_AUTH_PATH', 'GERRIT_AUTH')
61 GERRIT_CHANGE_NUMBER = os.getenv('GERRIT_CHANGE_NUMBER', None)
62
63 # GERRIT_AUTH should contain a single JSON dictionary of the form:
64 # {
65 #     "review.example.com": {
66 #         "gerrit/http": {
67 #             "username": "example-checkpatch",
68 #             "password": "1234"
69 #         }
70 #     }
71 #     ...
72 # }
73
74 CHECKPATCH_PATHS = _getenv_list('CHECKPATCH_PATHS', ['checkpatch.pl'])
75 CHECKPATCH_ARGS = os.getenv('CHECKPATCH_ARGS','--show-types -').split(' ')
76 CHECKPATCH_IGNORED_FILES = _getenv_list('CHECKPATCH_IGNORED_FILES', [
77         'lustre/ptlrpc/wiretest.c',
78         'lustre/utils/wiretest.c',
79         '*.patch'])
80 CHECKPATCH_IGNORED_KINDS = _getenv_list('CHECKPATCH_IGNORED_KINDS', [
81         'LASSERT',
82         'LCONSOLE',
83         'LEADING_SPACE'])
84 REVIEW_HISTORY_PATH = os.getenv('REVIEW_HISTORY_PATH', 'REVIEW_HISTORY')
85 STYLE_LINK = os.getenv('STYLE_LINK',
86         'http://wiki.lustre.org/Lustre_Coding_Style_Guidelines')
87
88 USE_CODE_REVIEW_SCORE = False
89
90 def parse_checkpatch_output(out, path_line_comments, warning_count):
91     """
92     Parse string output out of CHECKPATCH into path_line_comments.
93     Increment warning_count[0] for each warning.
94
95     path_line_comments is { PATH: { LINE: [COMMENT, ...] }, ... }.
96     """
97     def add_comment(path, line, level, kind, message):
98         """_"""
99         logging.debug("add_comment %s %d %s %s '%s'",
100                       path, line, level, kind, message)
101         if kind in CHECKPATCH_IGNORED_KINDS:
102             return
103
104         for pattern in CHECKPATCH_IGNORED_FILES:
105             if fnmatch.fnmatch(path, pattern):
106                 return
107
108         path_comments = path_line_comments.setdefault(path, {})
109         line_comments = path_comments.setdefault(line, [])
110         line_comments.append('(style) %s\n' % message)
111         warning_count[0] += 1
112
113     level = None # 'ERROR', 'WARNING'
114     kind = None # 'CODE_INDENT', 'LEADING_SPACE', ...
115     message = None # 'code indent should use tabs where possible'
116
117     for line in out.splitlines():
118         # ERROR:CODE_INDENT: code indent should use tabs where possible
119         # #404: FILE: lustre/liblustre/dir.c:103:
120         # +        op_data.op_hash_offset = hash_x_index(page->index, 0);$
121         line = line.strip()
122         if not line:
123             level, kind, message = None, None, None
124         elif line[0] == '#':
125             # '#404: FILE: lustre/liblustre/dir.c:103:'
126             tokens = line.split(':', 5)
127             if len(tokens) != 5 or tokens[1] != ' FILE':
128                 continue
129
130             path = tokens[2].strip()
131             line_number_str = tokens[3].strip()
132             if not line_number_str.isdigit():
133                 continue
134
135             line_number = int(line_number_str)
136
137             if path and level and kind and message:
138                 add_comment(path, line_number, level, kind, message)
139         elif line[0] == '+':
140             continue
141         else:
142             # ERROR:CODE_INDENT: code indent should use tabs where possible
143             try:
144                 level, kind, message = line.split(':', 2)
145             except ValueError:
146                 level, kind, message = None, None, None
147
148             if level not in ('ERROR', 'WARNING'):
149                 level, kind, message = None, None, None
150
151
152 def review_input_and_score(path_line_comments, warning_count):
153     """
154     Convert { PATH: { LINE: [COMMENT, ...] }, ... }, [11] to a gerrit
155     ReviewInput() and score
156     """
157     review_comments = {}
158
159     for path, line_comments in list(path_line_comments.items()):
160         path_comments = []
161         for line, comment_list in list(line_comments.items()):
162             message = '\n'.join(comment_list)
163             path_comments.append({'line': line, 'message': message})
164         review_comments[path] = path_comments
165
166     if warning_count[0] > 0:
167         score = -1
168     else:
169         score = +1
170
171     if USE_CODE_REVIEW_SCORE:
172         code_review_score = score
173     else:
174         code_review_score = 0
175
176     if score < 0:
177         return {
178             'message': ('%d style warning(s).\nFor more details please see %s' %
179                         (warning_count[0], STYLE_LINK)),
180             'labels': {
181                 'Code-Review': code_review_score
182                 },
183             'comments': review_comments,
184             'notify': 'OWNER',
185             }, score
186     else:
187         return {
188             'message': 'Looks good to me.',
189             'labels': {
190                 'Code-Review': code_review_score
191                 },
192             'notify': 'NONE',
193             }, score
194
195
196 def _now():
197     """_"""
198     return int(time.time())
199
200
201 class Reviewer(object):
202     """
203     * Poll gerrit instance for updates to changes matching project and branch.
204     * Pipe new patches through checkpatch.
205     * Convert checkpatch output to gerrit ReviewInput().
206     * Post ReviewInput() to gerrit instance.
207     * Track reviewed revisions in history_path.
208     """
209     def __init__(self, host, project, branch, username, password, history_path):
210         self.host = host
211         self.project = project
212         self.branch = branch
213         self.auth = requests.auth.HTTPDigestAuth(username, password)
214         self.logger = logging.getLogger(__name__)
215         self.history_path = history_path
216         self.history_mode = 'rw'
217         self.history = {}
218         self.timestamp = 0
219         self.post_enabled = True
220         self.post_interval = 10
221         self.update_interval = 300
222         self.request_timeout = 60
223
224     def _debug(self, msg, *args):
225         """_"""
226         self.logger.debug(msg, *args)
227
228     def _error(self, msg, *args):
229         """_"""
230         self.logger.error(msg, *args)
231
232     def _url(self, path):
233         """_"""
234         return 'http://' + self.host + '/a' + path
235
236     def _get(self, path):
237         """
238         GET path return Response.
239         """
240         url = self._url(path)
241         try:
242             res = requests.get(url, auth=self.auth,
243                                timeout=self.request_timeout)
244         except Exception as exc:
245             self._error("cannot GET '%s': exception = %s", url, str(exc))
246             return None
247
248         if res.status_code != requests.codes.ok:
249             self._error("cannot GET '%s': reason = %s, status_code = %d",
250                        url, res.reason, res.status_code)
251             return None
252
253         return res
254
255     def _post(self, path, obj):
256         """
257         POST json(obj) to path, return True on success.
258         """
259         url = self._url(path)
260         data = json.dumps(obj)
261         if not self.post_enabled:
262             self._debug("_post: disabled: url = '%s', data = '%s'", url, data)
263             return False
264
265         try:
266             res = requests.post(url, data=data,
267                                 headers={'Content-Type': 'application/json'},
268                                 auth=self.auth, timeout=self.request_timeout)
269         except Exception as exc:
270             self._error("cannot POST '%s': exception = %s", url, str(exc))
271             return False
272
273         if res.status_code != requests.codes.ok:
274             self._error("cannot POST '%s': reason = %s, status_code = %d",
275                        url, res.reason, res.status_code)
276             return False
277
278         return True
279
280     def load_history(self):
281         """
282         Load review history from history_path containing lines of the form:
283         EPOCH      FULL_CHANGE_ID                         REVISION    SCORE
284         1394536722 fs%2Flustre-release~master~I5cc6c23... 00e2cc75... 1
285         1394536721 -                                      -           0
286         1394537033 fs%2Flustre-release~master~I10be8e9... 44f7b504... 1
287         1394537032 -                                      -           0
288         1394537344 -                                      -           0
289         ...
290         """
291         if 'r' in self.history_mode:
292             with open(self.history_path) as history_file:
293                 for line in history_file:
294                     epoch, change_id, revision, score = line.split()
295                     if change_id == '-':
296                         self.timestamp = int(float(epoch))
297                     else:
298                         self.history[change_id + ' ' + revision] = score
299
300         self._debug("load_history: history size = %d, timestamp = %d",
301                     len(self.history), self.timestamp)
302
303     def write_history(self, change_id, revision, score, epoch=-1):
304         """
305         Add review record to history dict and file.
306         """
307         if change_id != '-':
308             self.history[change_id + ' ' + revision] = score
309
310         if epoch <= 0:
311             epoch = self.timestamp
312
313         if 'w' in self.history_mode:
314             with open(self.history_path, 'a') as history_file:
315                 print(epoch, change_id, revision, score, file=history_file)
316
317     def in_history(self, change_id, revision):
318         """
319         Return True if change_id/revision was already reviewed.
320         """
321         return change_id + ' ' + revision in self.history
322
323     def get_change_by_id(self, change_id):
324         """
325         GET one change by id.
326         """
327         path = ('/changes/' + quote(self.project, safe='') + '~' +
328                 quote(self.branch, safe='') + '~' + change_id +
329                 '?o=CURRENT_REVISION')
330         res = self._get(path)
331         if not res:
332             return None
333
334         # Gerrit uses " )]}'" to guard against XSSI.
335         return json.loads(res.content[5:])
336
337     def get_changes(self, query):
338         """
339         GET a list of ChangeInfo()s for all changes matching query.
340
341         {'status':'open', '-age':'60m'} =>
342           GET /changes/?q=project:...+status:open+-age:60m&o=CURRENT_REVISION =>
343             [ChangeInfo()...]
344         """
345         query = dict(query)
346         project = query.get('project', self.project)
347         query['project'] = quote(project, safe='')
348         branch = query.get('branch', self.branch)
349         query['branch'] = quote(branch, safe='')
350         path = ('/changes/?q=' +
351                 '+'.join(k + ':' + v for k, v in list(query.items())) +
352                 '&o=CURRENT_REVISION')
353         res = self._get(path)
354         if not res:
355             return []
356
357         # Gerrit uses " )]}'" to guard against XSSI.
358         return json.loads(res.content[5:])
359
360     def decode_patch(self, content):
361         """
362         Decode gerrit's idea of base64.
363
364         The base64 encoded patch returned by gerrit isn't always
365         padded correctly according to b64decode. Don't know why. Work
366         around this by appending more '=' characters or truncating the
367         content until it decodes. But do try the unmodified content
368         first.
369         """
370         for i in (0, 1, 2, 3, -1, -2, -3):
371             if i >= 0:
372                 padded_content = content + (i * '=')
373             else:
374                 padded_content = content[:i]
375
376             try:
377                 return base64.b64decode(padded_content)
378             except TypeError as exc:
379                 self._debug("decode_patch: len = %d, exception = %s",
380                            len(padded_content), str(exc))
381         else:
382             return ''
383
384     def get_patch(self, change, revision='current'):
385         """
386         GET and decode the (current) patch for change.
387         """
388         path = '/changes/' + change['id'] + '/revisions/' + revision + '/patch'
389         self._debug("get_patch: path = '%s'", path)
390         res = self._get(path)
391         if not res:
392             return ''
393
394         self._debug("get_patch: len(content) = %d, content = '%s...'",
395                    len(res.content), res.content[:20])
396
397         return self.decode_patch(res.content)
398
399     def post_review(self, change, revision, review_input):
400         """
401         POST review_input for the given revision of change.
402         """
403         path = '/changes/' + change['id'] + '/revisions/' + revision + '/review'
404         self._debug("post_review: path = '%s'", path)
405         return self._post(path, review_input)
406
407     def check_patch(self, patch):
408         """
409         Run each script in CHECKPATCH_PATHS on patch, return a
410         ReviewInput() and score.
411         """
412         path_line_comments = {}
413         warning_count = [0]
414
415         for path in CHECKPATCH_PATHS:
416             pipe = subprocess.Popen([path] + CHECKPATCH_ARGS,
417                                     stdin=subprocess.PIPE,
418                                     stdout=subprocess.PIPE,
419                                     stderr=subprocess.PIPE)
420             out, err = pipe.communicate(patch)
421             self._debug("check_patch: path = %s, out = '%s...', err = '%s...'",
422                         path, out[:80], err[:80])
423             parse_checkpatch_output(out, path_line_comments, warning_count)
424
425         return review_input_and_score(path_line_comments, warning_count)
426
427     def change_needs_review(self, change):
428         """
429         * Bail if the change isn't open (status is not 'NEW').
430         * Bail if we've already reviewed the current revision.
431         """
432         status = change.get('status')
433         if status != 'NEW':
434             self._debug("change_needs_review: status = %s", status)
435             return False
436
437         current_revision = change.get('current_revision')
438         self._debug("change_needs_review: current_revision = '%s'",
439                     current_revision)
440         if not current_revision:
441             return False
442
443         # Have we already checked this revision?
444         if self.in_history(change['id'], current_revision):
445             self._debug("change_needs_review: already reviewed")
446             return False
447
448         return True
449
450     def review_change(self, change):
451         """
452         Review the current revision of change.
453         * Pipe the patch through checkpatch(es).
454         * Save results to review history.
455         * POST review to gerrit.
456         """
457         self._debug("review_change: change = %s, subject = '%s'",
458                     change['id'], change.get('subject', ''))
459
460         current_revision = change.get('current_revision')
461         self._debug("change_needs_review: current_revision = '%s'",
462                     current_revision)
463         if not current_revision:
464             return
465
466         patch = self.get_patch(change, current_revision)
467         if not patch:
468             self._debug("review_change: no patch")
469             return
470
471         review_input, score = self.check_patch(patch)
472         self._debug("review_change: score = %d", score)
473         self.write_history(change['id'], current_revision, score)
474         self.post_review(change, current_revision, review_input)
475
476     def update(self):
477         """
478         GET recently updated changes and review as needed.
479         """
480         new_timestamp = _now()
481         age = new_timestamp - self.timestamp + 60 * 60 # 1h padding
482         self._debug("update: age = %d", age)
483
484         open_changes = self.get_changes({'status':'open',
485                                          '-age':str(age) + 's'})
486         self._debug("update: got %d open_changes", len(open_changes))
487
488         for change in open_changes:
489             if self.change_needs_review(change):
490                 self.review_change(change)
491                 # Don't POST more than every post_interval seconds.
492                 time.sleep(self.post_interval)
493
494         self.timestamp = new_timestamp
495         self.write_history('-', '-', 0)
496
497     def update_single_change(self, change):
498
499         self.load_history()
500
501         open_changes = self.get_changes({'status':'open',
502                                          'change':change})
503         self._debug("update: got %d open_changes", len(open_changes))
504
505         for change in open_changes:
506             if self.change_needs_review(change):
507                 self.review_change(change)
508
509     def run(self):
510         """
511         * Load review history.
512         * Call update() every poll_interval seconds.
513         """
514
515         if self.timestamp <= 0:
516             self.load_history()
517
518         while True:
519             self.update()
520             time.sleep(self.update_interval)
521
522
523 def main():
524     """_"""
525     logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
526
527     with open(GERRIT_AUTH_PATH) as auth_file:
528         auth = json.load(auth_file)
529         username = auth[GERRIT_HOST]['gerrit/http']['username']
530         password = auth[GERRIT_HOST]['gerrit/http']['password']
531
532     reviewer = Reviewer(GERRIT_HOST, GERRIT_PROJECT, GERRIT_BRANCH,
533                         username, password, REVIEW_HISTORY_PATH)
534
535     if GERRIT_CHANGE_NUMBER:
536         reviewer.update_single_change(GERRIT_CHANGE_NUMBER)
537     else:
538         reviewer.run()
539
540
541 if __name__ == "__main__":
542     main()