Whamcloud - gitweb
LU-4438 utils: handle ct_setup() errors
[fs/lustre-release.git] / contrib / scripts / gerrit_checkpatch.py
1 #!/usr/bin/env python
2 #
3 # GPL HEADER START
4 #
5 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License version 2 only,
9 # as published by the Free Software Foundation.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License version 2 for more details (a copy is included
15 # in the LICENSE file that accompanied this code).
16 #
17 # You should have received a copy of the GNU General Public License
18 # version 2 along with this program; If not, see
19 # http://www.gnu.org/licenses/gpl-2.0.html
20 #
21 # GPL HEADER END
22 #
23 # Copyright (c) 2014, Intel Corporation.
24 #
25 # Author: John L. Hammond <john.hammond@intel.com>
26 #
27 """
28 Gerrit Checkpatch Reviewer Daemon
29 ~~~~~~ ~~~~~~~~~~ ~~~~~~~~ ~~~~~~
30
31 * Watch for new change revisions in a gerrit instance.
32 * Pass new revisions through checkpatch script.
33 * POST reviews back to gerrit based on checkpatch output.
34 """
35
36 import base64
37 import fnmatch
38 import logging
39 import json
40 import os
41 import requests
42 import subprocess
43 import time
44 import urllib
45
46 def _getenv_list(key, default=None, sep=':'):
47     """
48     'PATH' => ['/bin', '/usr/bin', ...]
49     """
50     value = os.getenv(key)
51     if value is None:
52         return default
53     else:
54         return value.split(sep)
55
56 GERRIT_HOST = os.getenv('GERRIT_HOST', 'review.whamcloud.com')
57 GERRIT_PROJECT = os.getenv('GERRIT_PROJECT', 'fs/lustre-release')
58 GERRIT_BRANCH = os.getenv('GERRIT_BRANCH', 'master')
59 GERRIT_AUTH_PATH = os.getenv('GERRIT_AUTH_PATH', 'GERRIT_AUTH')
60
61 # GERRIT_AUTH should contain a single JSON dictionary of the form:
62 # {
63 #     "review.example.com": {
64 #         "gerrit/http": {
65 #             "username": "example-checkpatch",
66 #             "password": "1234"
67 #         }
68 #     }
69 #     ...
70 # }
71
72 CHECKPATCH_PATHS = _getenv_list('CHECKPATCH_PATHS', ['checkpatch.pl'])
73 CHECKPATCH_IGNORED_FILES = _getenv_list('CHECKPATCH_IGNORED_FILES', [
74         'lustre/contrib/wireshark/packet-lustre.c',
75         'lustre/ptlrpc/wiretest.c',
76         'lustre/utils/wiretest.c',
77         '*.patch'])
78 CHECKPATCH_IGNORED_KINDS = _getenv_list('CHECKPATCH_IGNORED_KINDS', [
79         'LEADING_SPACE'])
80 REVIEW_HISTORY_PATH = os.getenv('REVIEW_HISTORY_PATH', 'REVIEW_HISTORY')
81 STYLE_LINK = os.getenv('STYLE_LINK',
82         'https://wiki.hpdd.intel.com/display/PUB/Coding+Guidelines')
83
84 USE_CODE_REVIEW_SCORE = False
85
86 def parse_checkpatch_output(out, path_line_comments, warning_count):
87     """
88     Parse string output out of CHECKPATCH into path_line_comments.
89     Increment warning_count[0] for each warning.
90
91     path_line_comments is { PATH: { LINE: [COMMENT, ...] }, ... }.
92     """
93     def add_comment(path, line, level, kind, message):
94         """_"""
95         logging.debug("add_comment %s %d %s %s '%s'",
96                       path, line, level, kind, message)
97         if kind in CHECKPATCH_IGNORED_KINDS:
98             return
99
100         for pattern in CHECKPATCH_IGNORED_FILES:
101             if fnmatch.fnmatch(path, pattern):
102                 return
103
104         path_comments = path_line_comments.setdefault(path, {})
105         line_comments = path_comments.setdefault(line, [])
106         line_comments.append('(style) ' + message)
107         warning_count[0] += 1
108
109     level = None # 'ERROR', 'WARNING'
110     kind = None # 'CODE_INDENT', 'LEADING_SPACE', ...
111     message = None # 'code indent should use tabs where possible'
112
113     for line in out.splitlines():
114         # ERROR:CODE_INDENT: code indent should use tabs where possible
115         # #404: FILE: lustre/liblustre/dir.c:103:
116         # +        op_data.op_hash_offset = hash_x_index(page->index, 0);$
117         line = line.strip()
118         if not line:
119             level, kind, message = None, None, None
120         elif line[0] == '#':
121             # '#404: FILE: lustre/liblustre/dir.c:103:'
122             tokens = line.split(':', 5)
123             if len(tokens) != 5 or tokens[1] != ' FILE':
124                 continue
125
126             path = tokens[2].strip()
127             line_number_str = tokens[3].strip()
128             if not line_number_str.isdigit():
129                 continue
130
131             line_number = int(line_number_str)
132
133             if path and level and kind and message:
134                 add_comment(path, line_number, level, kind, message)
135         elif line[0] == '+':
136             continue
137         else:
138             # ERROR:CODE_INDENT: code indent should use tabs where possible
139             try:
140                 level, kind, message = line.split(':', 2)
141             except ValueError:
142                 level, kind, message = None, None, None
143
144             if level != 'ERROR' and level != 'WARNING':
145                 level, kind, message = None, None, None
146
147
148 def review_input_and_score(path_line_comments, warning_count):
149     """
150     Convert { PATH: { LINE: [COMMENT, ...] }, ... }, [11] to a gerrit
151     ReviewInput() and score
152     """
153     review_comments = {}
154
155     for path, line_comments in path_line_comments.iteritems():
156         path_comments = []
157         for line, comment_list in line_comments.iteritems():
158             message = '\n'.join(comment_list)
159             path_comments.append({'line': line, 'message': message})
160         review_comments[path] = path_comments
161
162     if warning_count[0] > 0:
163         score = -1
164     else:
165         score = +1
166
167     if USE_CODE_REVIEW_SCORE:
168         code_review_score = score
169     else:
170         code_review_score = 0
171
172     if score < 0:
173         return {
174             'message': ('%d style warning(s).\nFor more details please see %s' %
175                         (warning_count[0], STYLE_LINK)),
176             'labels': {
177                 'Code-Review': code_review_score
178                 },
179             'comments': review_comments
180             }, score
181     else:
182         return {
183             'message': 'Looks good to me.',
184             'labels': {
185                 'Code-Review': code_review_score
186                 }
187             }, score
188
189
190 def _now():
191     """_"""
192     return long(time.time())
193
194
195 class Reviewer(object):
196     """
197     * Poll gerrit instance for updates to changes matching project and branch.
198     * Pipe new patches through checkpatch.
199     * Convert checkpatch output to gerrit ReviewInput().
200     * Post ReviewInput() to gerrit instance.
201     * Track reviewed revisions in history_path.
202     """
203     def __init__(self, host, project, branch, username, password, history_path):
204         self.host = host
205         self.project = project
206         self.branch = branch
207         self.auth = requests.auth.HTTPDigestAuth(username, password)
208         self.logger = logging.getLogger(__name__)
209         self.history_path = history_path
210         self.history_mode = 'rw'
211         self.history = {}
212         self.timestamp = 0L
213         self.post_enabled = True
214         self.post_interval = 10
215         self.update_interval = 300
216
217     def _debug(self, msg, *args):
218         """_"""
219         self.logger.debug(msg, *args)
220
221     def _error(self, msg, *args):
222         """_"""
223         self.logger.error(msg, *args)
224
225     def _url(self, path):
226         """_"""
227         return 'http://' + self.host + '/a' + path
228
229     def _get(self, path):
230         """
231         GET path return Response.
232         """
233         url = self._url(path)
234         try:
235             res = requests.get(url, auth=self.auth)
236         except requests.exceptions.RequestException as exc:
237             self._error("cannot GET '%s': exception = %s", url, str(exc))
238             return None
239
240         if res.status_code != requests.codes.ok:
241             self._error("cannot GET '%s': reason = %s, status_code = %d",
242                        url, res.reason, res.status_code)
243             return None
244
245         return res
246
247     def _post(self, path, obj):
248         """
249         POST json(obj) to path, return True on success.
250         """
251         url = self._url(path)
252         data = json.dumps(obj)
253         if not self.post_enabled:
254             self._debug("_post: disabled: url = '%s', data = '%s'", url, data)
255             return False
256
257         try:
258             res = requests.post(url, data=data,
259                                 headers={'Content-Type': 'application/json'},
260                                 auth=self.auth)
261         except requests.exceptions.RequestException as exc:
262             self._error("cannot POST '%s': exception = %s", url, str(exc))
263             return False
264
265         if res.status_code != requests.codes.ok:
266             self._error("cannot POST '%s': reason = %s, status_code = %d",
267                        url, res.reason, res.status_code)
268             return False
269
270         return True
271
272     def load_history(self):
273         """
274         Load review history from history_path containing lines of the form:
275         EPOCH      FULL_CHANGE_ID                         REVISION    SCORE
276         1394536722 fs%2Flustre-release~master~I5cc6c23... 00e2cc75... 1
277         1394536721 -                                      -           0
278         1394537033 fs%2Flustre-release~master~I10be8e9... 44f7b504... 1
279         1394537032 -                                      -           0
280         1394537344 -                                      -           0
281         ...
282         """
283         if 'r' in self.history_mode:
284             with open(self.history_path) as history_file:
285                 for line in history_file:
286                     epoch, change_id, revision, score = line.split()
287                     if change_id == '-':
288                         self.timestamp = long(float(epoch))
289                     else:
290                         self.history[change_id + ' ' + revision] = score
291
292         self._debug("load_history: history size = %d, timestamp = %d",
293                     len(self.history), self.timestamp)
294
295     def write_history(self, change_id, revision, score, epoch=-1):
296         """
297         Add review record to history dict and file.
298         """
299         if change_id != '-':
300             self.history[change_id + ' ' + revision] = score
301
302         if epoch <= 0:
303             epoch = self.timestamp
304
305         if 'w' in self.history_mode:
306             with open(self.history_path, 'a') as history_file:
307                 print >> history_file, epoch, change_id, revision, score
308
309     def in_history(self, change_id, revision):
310         """
311         Return True if change_id/revision was already reviewed.
312         """
313         return change_id + ' ' + revision in self.history
314
315     def get_changes(self, query):
316         """
317         GET a list of ChangeInfo()s for all changes matching query.
318
319         {'status':'open', '-age':'60m'} =>
320           GET /changes/?q=project:...+status:open+-age:60m&o=CURRENT_REVISION =>
321             [ChangeInfo()...]
322         """
323         query = dict(query)
324         project = query.get('project', self.project)
325         query['project'] = urllib.quote(project, safe='')
326         branch = query.get('branch', self.branch)
327         query['branch'] = urllib.quote(branch, safe='')
328         path = ('/changes/?q=' +
329                 '+'.join(k + ':' + v for k, v in query.iteritems()) +
330                 '&o=CURRENT_REVISION')
331         res = self._get(path)
332         if not res:
333             return None
334
335         # Gerrit uses " )]}'" to guard against XSSI.
336         return json.loads(res.content[5:])
337
338     def decode_patch(self, content):
339         """
340         Decode gerrit's idea of base64.
341
342         The base64 encoded patch returned by gerrit isn't always
343         padded correctly according to b64decode. Don't know why. Work
344         around this by appending more '=' characters or truncating the
345         content until it decodes. But do try the unmodified content
346         first.
347         """
348         for i in (0, 1, 2, 3, -1, -2, -3):
349             if i >= 0:
350                 padded_content = content + (i * '=')
351             else:
352                 padded_content = content[:i]
353
354             try:
355                 return base64.b64decode(padded_content)
356             except TypeError as exc:
357                 self._debug("decode_patch: len = %d, exception = %s",
358                            len(padded_content), str(exc))
359         else:
360             return ''
361
362     def get_patch(self, change, revision='current'):
363         """
364         GET and decode the (current) patch for change.
365         """
366         path = '/changes/' + change['id'] + '/revisions/' + revision + '/patch'
367         self._debug("get_patch: path = '%s'", path)
368         res = self._get(path)
369         if not res:
370             return ''
371
372         self._debug("get_patch: len(content) = %d, content = '%s...'",
373                    len(res.content), res.content[:20])
374
375         return self.decode_patch(res.content)
376
377     def set_review(self, change, revision, review_input):
378         """
379         POST review_input for the given revision of change.
380         """
381         path = '/changes/' + change['id'] + '/revisions/' + revision + '/review'
382         self._debug("set_review: path = '%s'", path)
383         return self._post(path, review_input)
384
385     def check_patch(self, patch):
386         """
387         Run each script in CHECKPATCH_PATHS on patch, return a
388         ReviewInput() and score.
389         """
390         path_line_comments = {}
391         warning_count = [0]
392
393         for path in CHECKPATCH_PATHS:
394             pipe = subprocess.Popen([path, '--show-types', '-'],
395                                     stdin=subprocess.PIPE,
396                                     stdout=subprocess.PIPE,
397                                     stderr=subprocess.PIPE)
398             out, err = pipe.communicate(patch)
399             self._debug("check_patch: path = %s, out = '%s...', err = '%s...'",
400                         path, out[:80], err[:80])
401             parse_checkpatch_output(out, path_line_comments, warning_count)
402
403         return review_input_and_score(path_line_comments, warning_count)
404
405     def review_change(self, change, force=False):
406         """
407         Review the current revision of change.
408         * Bail if the change isn't open (status is not 'NEW').
409         * GET the current revision from gerrit.
410         * Bail if we've already reviewed it (unless force is True).
411         * Pipe the patch through checkpatch(es).
412         * Save results to review history.
413         * POST review to gerrit.
414         """
415         self._debug("review_change: change = %s, subject = '%s'",
416                     change['id'], change.get('subject', ''))
417
418         status = change.get('status')
419         if status != 'NEW':
420             self._debug("review_change: status = %s", status)
421             return False
422
423         current_revision = change.get('current_revision')
424         self._debug("review_change: current_revision = '%s'", current_revision)
425         if not current_revision:
426             return False
427
428         # Have we already checked this revision?
429         if self.in_history(change['id'], current_revision) and not force:
430             self._debug("review_change: already reviewed")
431             return False
432
433         patch = self.get_patch(change, current_revision)
434         if not patch:
435             self._debug("review_change: no patch")
436             return False
437
438         review_input, score = self.check_patch(patch)
439         self._debug("review_change: score = %d", score)
440         self.write_history(change['id'], current_revision, score)
441         self.set_review(change, current_revision, review_input)
442         # Don't POST more than every post_interval seconds.
443         time.sleep(self.post_interval)
444
445     def update(self):
446         """
447         GET recently updated changes and review as needed.
448         """
449         new_timestamp = _now()
450         age = new_timestamp - self.timestamp + 60 * 60 # 1h padding
451         self._debug("update: age = %d", age)
452
453         open_changes = self.get_changes({'status':'open',
454                                          '-age':str(age) + 's'})
455         self._debug("update: got %d open_changes", len(open_changes))
456
457         for change in open_changes:
458             self.review_change(change)
459
460         self.timestamp = new_timestamp
461         self.write_history('-', '-', 0)
462
463     def run(self):
464         """
465         * Load review history.
466         * Call update() every poll_interval seconds.
467         """
468
469         if self.timestamp <= 0:
470             self.load_history()
471
472         while True:
473             self.update()
474             time.sleep(self.update_interval)
475
476
477 def main():
478     """_"""
479     logging.basicConfig(level=logging.DEBUG)
480
481     with open(GERRIT_AUTH_PATH) as auth_file:
482         auth = json.load(auth_file)
483         username = auth[GERRIT_HOST]['gerrit/http']['username']
484         password = auth[GERRIT_HOST]['gerrit/http']['password']
485
486     reviewer = Reviewer(GERRIT_HOST, GERRIT_PROJECT, GERRIT_BRANCH,
487                         username, password, REVIEW_HISTORY_PATH)
488     reviewer.run()
489
490
491 if __name__ == "__main__":
492     main()