Whamcloud - gitweb
5058debed272c2969b8203fe6ca007d298896a87
[fs/lustre-release.git] / contrib / scripts / gerrit_checkpatch.py
1 #!/usr/bin/env python
2 #
3 # GPL HEADER START
4 #
5 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License version 2 only,
9 # as published by the Free Software Foundation.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License version 2 for more details (a copy is included
15 # in the LICENSE file that accompanied this code).
16 #
17 # You should have received a copy of the GNU General Public License
18 # version 2 along with this program; If not, see
19 # http://www.gnu.org/licenses/gpl-2.0.html
20 #
21 # GPL HEADER END
22 #
23 # Copyright (c) 2014, Intel Corporation.
24 #
25 # Author: John L. Hammond <john.hammond@intel.com>
26 #
27 """
28 Gerrit Checkpatch Reviewer Daemon
29 ~~~~~~ ~~~~~~~~~~ ~~~~~~~~ ~~~~~~
30
31 * Watch for new change revisions in a gerrit instance.
32 * Pass new revisions through checkpatch script.
33 * POST reviews back to gerrit based on checkpatch output.
34 """
35
36 import base64
37 import fnmatch
38 import logging
39 import json
40 import os
41 import requests
42 import subprocess
43 import time
44 import urllib
45
46 def _getenv_list(key, default=None, sep=':'):
47     """
48     'PATH' => ['/bin', '/usr/bin', ...]
49     """
50     value = os.getenv(key)
51     if value is None:
52         return default
53     else:
54         return value.split(sep)
55
56 GERRIT_HOST = os.getenv('GERRIT_HOST', 'review.whamcloud.com')
57 GERRIT_PROJECT = os.getenv('GERRIT_PROJECT', 'fs/lustre-release')
58 GERRIT_BRANCH = os.getenv('GERRIT_BRANCH', 'master')
59 GERRIT_AUTH_PATH = os.getenv('GERRIT_AUTH_PATH', 'GERRIT_AUTH')
60
61 # GERRIT_AUTH should contain a single JSON dictionary of the form:
62 # {
63 #     "review.example.com": {
64 #         "gerrit/http": {
65 #             "username": "example-checkpatch",
66 #             "password": "1234"
67 #         }
68 #     }
69 #     ...
70 # }
71
72 CHECKPATCH_PATHS = _getenv_list('CHECKPATCH_PATHS', ['checkpatch.pl'])
73 CHECKPATCH_IGNORED_FILES = _getenv_list('CHECKPATCH_IGNORED_FILES', [
74         'lustre/contrib/wireshark/packet-lustre.c',
75         'lustre/ptlrpc/wiretest.c',
76         'lustre/utils/wiretest.c',
77         '*.patch'])
78 CHECKPATCH_IGNORED_KINDS = _getenv_list('CHECKPATCH_IGNORED_KINDS', [
79         'LEADING_SPACE'])
80 REVIEW_HISTORY_PATH = os.getenv('REVIEW_HISTORY_PATH', 'REVIEW_HISTORY')
81 STYLE_LINK = os.getenv('STYLE_LINK',
82         'https://wiki.hpdd.intel.com/display/PUB/Coding+Guidelines')
83
84 USE_CODE_REVIEW_SCORE = False
85
86 def parse_checkpatch_output(out, path_line_comments, warning_count):
87     """
88     Parse string output out of CHECKPATCH into path_line_comments.
89     Increment warning_count[0] for each warning.
90
91     path_line_comments is { PATH: { LINE: [COMMENT, ...] }, ... }.
92     """
93     def add_comment(path, line, level, kind, message):
94         """_"""
95         logging.debug("add_comment %s %d %s %s '%s'",
96                       path, line, level, kind, message)
97         if kind in CHECKPATCH_IGNORED_KINDS:
98             return
99
100         for pattern in CHECKPATCH_IGNORED_FILES:
101             if fnmatch.fnmatch(path, pattern):
102                 return
103
104         path_comments = path_line_comments.setdefault(path, {})
105         line_comments = path_comments.setdefault(line, [])
106         line_comments.append('(style) ' + message)
107         warning_count[0] += 1
108
109     level = None # 'ERROR', 'WARNING'
110     kind = None # 'CODE_INDENT', 'LEADING_SPACE', ...
111     message = None # 'code indent should use tabs where possible'
112
113     for line in out.splitlines():
114         # ERROR:CODE_INDENT: code indent should use tabs where possible
115         # #404: FILE: lustre/liblustre/dir.c:103:
116         # +        op_data.op_hash_offset = hash_x_index(page->index, 0);$
117         line = line.strip()
118         if not line:
119             level, kind, message = None, None, None
120         elif line[0] == '#':
121             # '#404: FILE: lustre/liblustre/dir.c:103:'
122             tokens = line.split(':', 5)
123             if len(tokens) != 5 or tokens[1] != ' FILE':
124                 continue
125
126             path = tokens[2].strip()
127             line_number_str = tokens[3].strip()
128             if not line_number_str.isdigit():
129                 continue
130
131             line_number = int(line_number_str)
132
133             if path and level and kind and message:
134                 add_comment(path, line_number, level, kind, message)
135         elif line[0] == '+':
136             continue
137         else:
138             # ERROR:CODE_INDENT: code indent should use tabs where possible
139             try:
140                 level, kind, message = line.split(':', 2)
141             except ValueError:
142                 level, kind, message = None, None, None
143
144             if level != 'ERROR' and level != 'WARNING':
145                 level, kind, message = None, None, None
146
147
148 def review_input_and_score(path_line_comments, warning_count):
149     """
150     Convert { PATH: { LINE: [COMMENT, ...] }, ... }, [11] to a gerrit
151     ReviewInput() and score
152     """
153     review_comments = {}
154
155     for path, line_comments in path_line_comments.iteritems():
156         path_comments = []
157         for line, comment_list in line_comments.iteritems():
158             message = '\n'.join(comment_list)
159             path_comments.append({'line': line, 'message': message})
160         review_comments[path] = path_comments
161
162     if warning_count[0] > 0:
163         score = -1
164     else:
165         score = +1
166
167     if USE_CODE_REVIEW_SCORE:
168         code_review_score = score
169     else:
170         code_review_score = 0
171
172     if score < 0:
173         return {
174             'message': ('%d style warning(s).\nFor more details please see %s' %
175                         (warning_count[0], STYLE_LINK)),
176             'labels': {
177                 'Code-Review': code_review_score
178                 },
179             'comments': review_comments,
180             'notify': 'OWNER',
181             }, score
182     else:
183         return {
184             'message': 'Looks good to me.',
185             'labels': {
186                 'Code-Review': code_review_score
187                 },
188             'notify': 'NONE',
189             }, score
190
191
192 def _now():
193     """_"""
194     return long(time.time())
195
196
197 class Reviewer(object):
198     """
199     * Poll gerrit instance for updates to changes matching project and branch.
200     * Pipe new patches through checkpatch.
201     * Convert checkpatch output to gerrit ReviewInput().
202     * Post ReviewInput() to gerrit instance.
203     * Track reviewed revisions in history_path.
204     """
205     def __init__(self, host, project, branch, username, password, history_path):
206         self.host = host
207         self.project = project
208         self.branch = branch
209         self.auth = requests.auth.HTTPDigestAuth(username, password)
210         self.logger = logging.getLogger(__name__)
211         self.history_path = history_path
212         self.history_mode = 'rw'
213         self.history = {}
214         self.timestamp = 0L
215         self.post_enabled = True
216         self.post_interval = 10
217         self.update_interval = 300
218
219     def _debug(self, msg, *args):
220         """_"""
221         self.logger.debug(msg, *args)
222
223     def _error(self, msg, *args):
224         """_"""
225         self.logger.error(msg, *args)
226
227     def _url(self, path):
228         """_"""
229         return 'http://' + self.host + '/a' + path
230
231     def _get(self, path):
232         """
233         GET path return Response.
234         """
235         url = self._url(path)
236         try:
237             res = requests.get(url, auth=self.auth)
238         except requests.exceptions.RequestException as exc:
239             self._error("cannot GET '%s': exception = %s", url, str(exc))
240             return None
241
242         if res.status_code != requests.codes.ok:
243             self._error("cannot GET '%s': reason = %s, status_code = %d",
244                        url, res.reason, res.status_code)
245             return None
246
247         return res
248
249     def _post(self, path, obj):
250         """
251         POST json(obj) to path, return True on success.
252         """
253         url = self._url(path)
254         data = json.dumps(obj)
255         if not self.post_enabled:
256             self._debug("_post: disabled: url = '%s', data = '%s'", url, data)
257             return False
258
259         try:
260             res = requests.post(url, data=data,
261                                 headers={'Content-Type': 'application/json'},
262                                 auth=self.auth)
263         except requests.exceptions.RequestException as exc:
264             self._error("cannot POST '%s': exception = %s", url, str(exc))
265             return False
266
267         if res.status_code != requests.codes.ok:
268             self._error("cannot POST '%s': reason = %s, status_code = %d",
269                        url, res.reason, res.status_code)
270             return False
271
272         return True
273
274     def load_history(self):
275         """
276         Load review history from history_path containing lines of the form:
277         EPOCH      FULL_CHANGE_ID                         REVISION    SCORE
278         1394536722 fs%2Flustre-release~master~I5cc6c23... 00e2cc75... 1
279         1394536721 -                                      -           0
280         1394537033 fs%2Flustre-release~master~I10be8e9... 44f7b504... 1
281         1394537032 -                                      -           0
282         1394537344 -                                      -           0
283         ...
284         """
285         if 'r' in self.history_mode:
286             with open(self.history_path) as history_file:
287                 for line in history_file:
288                     epoch, change_id, revision, score = line.split()
289                     if change_id == '-':
290                         self.timestamp = long(float(epoch))
291                     else:
292                         self.history[change_id + ' ' + revision] = score
293
294         self._debug("load_history: history size = %d, timestamp = %d",
295                     len(self.history), self.timestamp)
296
297     def write_history(self, change_id, revision, score, epoch=-1):
298         """
299         Add review record to history dict and file.
300         """
301         if change_id != '-':
302             self.history[change_id + ' ' + revision] = score
303
304         if epoch <= 0:
305             epoch = self.timestamp
306
307         if 'w' in self.history_mode:
308             with open(self.history_path, 'a') as history_file:
309                 print >> history_file, epoch, change_id, revision, score
310
311     def in_history(self, change_id, revision):
312         """
313         Return True if change_id/revision was already reviewed.
314         """
315         return change_id + ' ' + revision in self.history
316
317     def get_changes(self, query):
318         """
319         GET a list of ChangeInfo()s for all changes matching query.
320
321         {'status':'open', '-age':'60m'} =>
322           GET /changes/?q=project:...+status:open+-age:60m&o=CURRENT_REVISION =>
323             [ChangeInfo()...]
324         """
325         query = dict(query)
326         project = query.get('project', self.project)
327         query['project'] = urllib.quote(project, safe='')
328         branch = query.get('branch', self.branch)
329         query['branch'] = urllib.quote(branch, safe='')
330         path = ('/changes/?q=' +
331                 '+'.join(k + ':' + v for k, v in query.iteritems()) +
332                 '&o=CURRENT_REVISION')
333         res = self._get(path)
334         if not res:
335             return None
336
337         # Gerrit uses " )]}'" to guard against XSSI.
338         return json.loads(res.content[5:])
339
340     def decode_patch(self, content):
341         """
342         Decode gerrit's idea of base64.
343
344         The base64 encoded patch returned by gerrit isn't always
345         padded correctly according to b64decode. Don't know why. Work
346         around this by appending more '=' characters or truncating the
347         content until it decodes. But do try the unmodified content
348         first.
349         """
350         for i in (0, 1, 2, 3, -1, -2, -3):
351             if i >= 0:
352                 padded_content = content + (i * '=')
353             else:
354                 padded_content = content[:i]
355
356             try:
357                 return base64.b64decode(padded_content)
358             except TypeError as exc:
359                 self._debug("decode_patch: len = %d, exception = %s",
360                            len(padded_content), str(exc))
361         else:
362             return ''
363
364     def get_patch(self, change, revision='current'):
365         """
366         GET and decode the (current) patch for change.
367         """
368         path = '/changes/' + change['id'] + '/revisions/' + revision + '/patch'
369         self._debug("get_patch: path = '%s'", path)
370         res = self._get(path)
371         if not res:
372             return ''
373
374         self._debug("get_patch: len(content) = %d, content = '%s...'",
375                    len(res.content), res.content[:20])
376
377         return self.decode_patch(res.content)
378
379     def set_review(self, change, revision, review_input):
380         """
381         POST review_input for the given revision of change.
382         """
383         path = '/changes/' + change['id'] + '/revisions/' + revision + '/review'
384         self._debug("set_review: path = '%s'", path)
385         return self._post(path, review_input)
386
387     def check_patch(self, patch):
388         """
389         Run each script in CHECKPATCH_PATHS on patch, return a
390         ReviewInput() and score.
391         """
392         path_line_comments = {}
393         warning_count = [0]
394
395         for path in CHECKPATCH_PATHS:
396             pipe = subprocess.Popen([path, '--show-types', '-'],
397                                     stdin=subprocess.PIPE,
398                                     stdout=subprocess.PIPE,
399                                     stderr=subprocess.PIPE)
400             out, err = pipe.communicate(patch)
401             self._debug("check_patch: path = %s, out = '%s...', err = '%s...'",
402                         path, out[:80], err[:80])
403             parse_checkpatch_output(out, path_line_comments, warning_count)
404
405         return review_input_and_score(path_line_comments, warning_count)
406
407     def review_change(self, change, force=False):
408         """
409         Review the current revision of change.
410         * Bail if the change isn't open (status is not 'NEW').
411         * GET the current revision from gerrit.
412         * Bail if we've already reviewed it (unless force is True).
413         * Pipe the patch through checkpatch(es).
414         * Save results to review history.
415         * POST review to gerrit.
416         """
417         self._debug("review_change: change = %s, subject = '%s'",
418                     change['id'], change.get('subject', ''))
419
420         status = change.get('status')
421         if status != 'NEW':
422             self._debug("review_change: status = %s", status)
423             return False
424
425         current_revision = change.get('current_revision')
426         self._debug("review_change: current_revision = '%s'", current_revision)
427         if not current_revision:
428             return False
429
430         # Have we already checked this revision?
431         if self.in_history(change['id'], current_revision) and not force:
432             self._debug("review_change: already reviewed")
433             return False
434
435         patch = self.get_patch(change, current_revision)
436         if not patch:
437             self._debug("review_change: no patch")
438             return False
439
440         review_input, score = self.check_patch(patch)
441         self._debug("review_change: score = %d", score)
442         self.write_history(change['id'], current_revision, score)
443         self.set_review(change, current_revision, review_input)
444         # Don't POST more than every post_interval seconds.
445         time.sleep(self.post_interval)
446
447     def update(self):
448         """
449         GET recently updated changes and review as needed.
450         """
451         new_timestamp = _now()
452         age = new_timestamp - self.timestamp + 60 * 60 # 1h padding
453         self._debug("update: age = %d", age)
454
455         open_changes = self.get_changes({'status':'open',
456                                          '-age':str(age) + 's'})
457         self._debug("update: got %d open_changes", len(open_changes))
458
459         for change in open_changes:
460             self.review_change(change)
461
462         self.timestamp = new_timestamp
463         self.write_history('-', '-', 0)
464
465     def run(self):
466         """
467         * Load review history.
468         * Call update() every poll_interval seconds.
469         """
470
471         if self.timestamp <= 0:
472             self.load_history()
473
474         while True:
475             self.update()
476             time.sleep(self.update_interval)
477
478
479 def main():
480     """_"""
481     logging.basicConfig(level=logging.DEBUG)
482
483     with open(GERRIT_AUTH_PATH) as auth_file:
484         auth = json.load(auth_file)
485         username = auth[GERRIT_HOST]['gerrit/http']['username']
486         password = auth[GERRIT_HOST]['gerrit/http']['password']
487
488     reviewer = Reviewer(GERRIT_HOST, GERRIT_PROJECT, GERRIT_BRANCH,
489                         username, password, REVIEW_HISTORY_PATH)
490     reviewer.run()
491
492
493 if __name__ == "__main__":
494     main()