Whamcloud - gitweb
LU-11297 lnet: MR Routing Feature
[fs/lustre-release.git] / contrib / scripts / gerrit_checkpatch.py
1 #!/usr/bin/env python2
2 #
3 # GPL HEADER START
4 #
5 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License version 2 only,
9 # as published by the Free Software Foundation.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License version 2 for more details (a copy is included
15 # in the LICENSE file that accompanied this code).
16 #
17 # You should have received a copy of the GNU General Public License
18 # version 2 along with this program; If not, see
19 # http://www.gnu.org/licenses/gpl-2.0.html
20 #
21 # GPL HEADER END
22 #
23 # Copyright (c) 2014, Intel Corporation.
24 #
25 # Author: John L. Hammond <john.hammond@intel.com>
26 #
27 """
28 Gerrit Checkpatch Reviewer Daemon
29 ~~~~~~ ~~~~~~~~~~ ~~~~~~~~ ~~~~~~
30
31 * Watch for new change revisions in a gerrit instance.
32 * Pass new revisions through checkpatch script.
33 * POST reviews back to gerrit based on checkpatch output.
34 """
35
36 import base64
37 import fnmatch
38 import logging
39 import json
40 import os
41 import requests
42 import subprocess
43 import time
44 import urllib
45
46 def _getenv_list(key, default=None, sep=':'):
47     """
48     'PATH' => ['/bin', '/usr/bin', ...]
49     """
50     value = os.getenv(key)
51     if value is None:
52         return default
53     else:
54         return value.split(sep)
55
56 GERRIT_HOST = os.getenv('GERRIT_HOST', 'review.whamcloud.com')
57 GERRIT_PROJECT = os.getenv('GERRIT_PROJECT', 'fs/lustre-release')
58 GERRIT_BRANCH = os.getenv('GERRIT_BRANCH', 'master')
59 GERRIT_AUTH_PATH = os.getenv('GERRIT_AUTH_PATH', 'GERRIT_AUTH')
60 GERRIT_CHANGE_NUMBER = os.getenv('GERRIT_CHANGE_NUMBER', None)
61
62 # GERRIT_AUTH should contain a single JSON dictionary of the form:
63 # {
64 #     "review.example.com": {
65 #         "gerrit/http": {
66 #             "username": "example-checkpatch",
67 #             "password": "1234"
68 #         }
69 #     }
70 #     ...
71 # }
72
73 CHECKPATCH_PATHS = _getenv_list('CHECKPATCH_PATHS', ['checkpatch.pl'])
74 CHECKPATCH_ARGS = os.getenv('CHECKPATCH_ARGS','--show-types -').split(' ')
75 CHECKPATCH_IGNORED_FILES = _getenv_list('CHECKPATCH_IGNORED_FILES', [
76         'lustre/ptlrpc/wiretest.c',
77         'lustre/utils/wiretest.c',
78         '*.patch'])
79 CHECKPATCH_IGNORED_KINDS = _getenv_list('CHECKPATCH_IGNORED_KINDS', [
80         'LASSERT',
81         'LCONSOLE',
82         'LEADING_SPACE'])
83 REVIEW_HISTORY_PATH = os.getenv('REVIEW_HISTORY_PATH', 'REVIEW_HISTORY')
84 STYLE_LINK = os.getenv('STYLE_LINK',
85         'http://wiki.lustre.org/Lustre_Coding_Style_Guidelines')
86
87 USE_CODE_REVIEW_SCORE = False
88
89 def parse_checkpatch_output(out, path_line_comments, warning_count):
90     """
91     Parse string output out of CHECKPATCH into path_line_comments.
92     Increment warning_count[0] for each warning.
93
94     path_line_comments is { PATH: { LINE: [COMMENT, ...] }, ... }.
95     """
96     def add_comment(path, line, level, kind, message):
97         """_"""
98         logging.debug("add_comment %s %d %s %s '%s'",
99                       path, line, level, kind, message)
100         if kind in CHECKPATCH_IGNORED_KINDS:
101             return
102
103         for pattern in CHECKPATCH_IGNORED_FILES:
104             if fnmatch.fnmatch(path, pattern):
105                 return
106
107         path_comments = path_line_comments.setdefault(path, {})
108         line_comments = path_comments.setdefault(line, [])
109         line_comments.append('(style) %s\n' % message)
110         warning_count[0] += 1
111
112     level = None # 'ERROR', 'WARNING'
113     kind = None # 'CODE_INDENT', 'LEADING_SPACE', ...
114     message = None # 'code indent should use tabs where possible'
115
116     for line in out.splitlines():
117         # ERROR:CODE_INDENT: code indent should use tabs where possible
118         # #404: FILE: lustre/liblustre/dir.c:103:
119         # +        op_data.op_hash_offset = hash_x_index(page->index, 0);$
120         line = line.strip()
121         if not line:
122             level, kind, message = None, None, None
123         elif line[0] == '#':
124             # '#404: FILE: lustre/liblustre/dir.c:103:'
125             tokens = line.split(':', 5)
126             if len(tokens) != 5 or tokens[1] != ' FILE':
127                 continue
128
129             path = tokens[2].strip()
130             line_number_str = tokens[3].strip()
131             if not line_number_str.isdigit():
132                 continue
133
134             line_number = int(line_number_str)
135
136             if path and level and kind and message:
137                 add_comment(path, line_number, level, kind, message)
138         elif line[0] == '+':
139             continue
140         else:
141             # ERROR:CODE_INDENT: code indent should use tabs where possible
142             try:
143                 level, kind, message = line.split(':', 2)
144             except ValueError:
145                 level, kind, message = None, None, None
146
147             if level != 'ERROR' and level != 'WARNING':
148                 level, kind, message = None, None, None
149
150
151 def review_input_and_score(path_line_comments, warning_count):
152     """
153     Convert { PATH: { LINE: [COMMENT, ...] }, ... }, [11] to a gerrit
154     ReviewInput() and score
155     """
156     review_comments = {}
157
158     for path, line_comments in path_line_comments.iteritems():
159         path_comments = []
160         for line, comment_list in line_comments.iteritems():
161             message = '\n'.join(comment_list)
162             path_comments.append({'line': line, 'message': message})
163         review_comments[path] = path_comments
164
165     if warning_count[0] > 0:
166         score = -1
167     else:
168         score = +1
169
170     if USE_CODE_REVIEW_SCORE:
171         code_review_score = score
172     else:
173         code_review_score = 0
174
175     if score < 0:
176         return {
177             'message': ('%d style warning(s).\nFor more details please see %s' %
178                         (warning_count[0], STYLE_LINK)),
179             'labels': {
180                 'Code-Review': code_review_score
181                 },
182             'comments': review_comments,
183             'notify': 'OWNER',
184             }, score
185     else:
186         return {
187             'message': 'Looks good to me.',
188             'labels': {
189                 'Code-Review': code_review_score
190                 },
191             'notify': 'NONE',
192             }, score
193
194
195 def _now():
196     """_"""
197     return long(time.time())
198
199
200 class Reviewer(object):
201     """
202     * Poll gerrit instance for updates to changes matching project and branch.
203     * Pipe new patches through checkpatch.
204     * Convert checkpatch output to gerrit ReviewInput().
205     * Post ReviewInput() to gerrit instance.
206     * Track reviewed revisions in history_path.
207     """
208     def __init__(self, host, project, branch, username, password, history_path):
209         self.host = host
210         self.project = project
211         self.branch = branch
212         self.auth = requests.auth.HTTPDigestAuth(username, password)
213         self.logger = logging.getLogger(__name__)
214         self.history_path = history_path
215         self.history_mode = 'rw'
216         self.history = {}
217         self.timestamp = 0L
218         self.post_enabled = True
219         self.post_interval = 10
220         self.update_interval = 300
221         self.request_timeout = 60
222
223     def _debug(self, msg, *args):
224         """_"""
225         self.logger.debug(msg, *args)
226
227     def _error(self, msg, *args):
228         """_"""
229         self.logger.error(msg, *args)
230
231     def _url(self, path):
232         """_"""
233         return 'http://' + self.host + '/a' + path
234
235     def _get(self, path):
236         """
237         GET path return Response.
238         """
239         url = self._url(path)
240         try:
241             res = requests.get(url, auth=self.auth,
242                                timeout=self.request_timeout)
243         except Exception as exc:
244             self._error("cannot GET '%s': exception = %s", url, str(exc))
245             return None
246
247         if res.status_code != requests.codes.ok:
248             self._error("cannot GET '%s': reason = %s, status_code = %d",
249                        url, res.reason, res.status_code)
250             return None
251
252         return res
253
254     def _post(self, path, obj):
255         """
256         POST json(obj) to path, return True on success.
257         """
258         url = self._url(path)
259         data = json.dumps(obj)
260         if not self.post_enabled:
261             self._debug("_post: disabled: url = '%s', data = '%s'", url, data)
262             return False
263
264         try:
265             res = requests.post(url, data=data,
266                                 headers={'Content-Type': 'application/json'},
267                                 auth=self.auth, timeout=self.request_timeout)
268         except Exception as exc:
269             self._error("cannot POST '%s': exception = %s", url, str(exc))
270             return False
271
272         if res.status_code != requests.codes.ok:
273             self._error("cannot POST '%s': reason = %s, status_code = %d",
274                        url, res.reason, res.status_code)
275             return False
276
277         return True
278
279     def load_history(self):
280         """
281         Load review history from history_path containing lines of the form:
282         EPOCH      FULL_CHANGE_ID                         REVISION    SCORE
283         1394536722 fs%2Flustre-release~master~I5cc6c23... 00e2cc75... 1
284         1394536721 -                                      -           0
285         1394537033 fs%2Flustre-release~master~I10be8e9... 44f7b504... 1
286         1394537032 -                                      -           0
287         1394537344 -                                      -           0
288         ...
289         """
290         if 'r' in self.history_mode:
291             with open(self.history_path) as history_file:
292                 for line in history_file:
293                     epoch, change_id, revision, score = line.split()
294                     if change_id == '-':
295                         self.timestamp = long(float(epoch))
296                     else:
297                         self.history[change_id + ' ' + revision] = score
298
299         self._debug("load_history: history size = %d, timestamp = %d",
300                     len(self.history), self.timestamp)
301
302     def write_history(self, change_id, revision, score, epoch=-1):
303         """
304         Add review record to history dict and file.
305         """
306         if change_id != '-':
307             self.history[change_id + ' ' + revision] = score
308
309         if epoch <= 0:
310             epoch = self.timestamp
311
312         if 'w' in self.history_mode:
313             with open(self.history_path, 'a') as history_file:
314                 print >> history_file, epoch, change_id, revision, score
315
316     def in_history(self, change_id, revision):
317         """
318         Return True if change_id/revision was already reviewed.
319         """
320         return change_id + ' ' + revision in self.history
321
322     def get_change_by_id(self, change_id):
323         """
324         GET one change by id.
325         """
326         path = ('/changes/' + urllib.quote(self.project, safe='') + '~' +
327                 urllib.quote(self.branch, safe='') + '~' + change_id +
328                 '?o=CURRENT_REVISION')
329         res = self._get(path)
330         if not res:
331             return None
332
333         # Gerrit uses " )]}'" to guard against XSSI.
334         return json.loads(res.content[5:])
335
336     def get_changes(self, query):
337         """
338         GET a list of ChangeInfo()s for all changes matching query.
339
340         {'status':'open', '-age':'60m'} =>
341           GET /changes/?q=project:...+status:open+-age:60m&o=CURRENT_REVISION =>
342             [ChangeInfo()...]
343         """
344         query = dict(query)
345         project = query.get('project', self.project)
346         query['project'] = urllib.quote(project, safe='')
347         branch = query.get('branch', self.branch)
348         query['branch'] = urllib.quote(branch, safe='')
349         path = ('/changes/?q=' +
350                 '+'.join(k + ':' + v for k, v in query.iteritems()) +
351                 '&o=CURRENT_REVISION')
352         res = self._get(path)
353         if not res:
354             return []
355
356         # Gerrit uses " )]}'" to guard against XSSI.
357         return json.loads(res.content[5:])
358
359     def decode_patch(self, content):
360         """
361         Decode gerrit's idea of base64.
362
363         The base64 encoded patch returned by gerrit isn't always
364         padded correctly according to b64decode. Don't know why. Work
365         around this by appending more '=' characters or truncating the
366         content until it decodes. But do try the unmodified content
367         first.
368         """
369         for i in (0, 1, 2, 3, -1, -2, -3):
370             if i >= 0:
371                 padded_content = content + (i * '=')
372             else:
373                 padded_content = content[:i]
374
375             try:
376                 return base64.b64decode(padded_content)
377             except TypeError as exc:
378                 self._debug("decode_patch: len = %d, exception = %s",
379                            len(padded_content), str(exc))
380         else:
381             return ''
382
383     def get_patch(self, change, revision='current'):
384         """
385         GET and decode the (current) patch for change.
386         """
387         path = '/changes/' + change['id'] + '/revisions/' + revision + '/patch'
388         self._debug("get_patch: path = '%s'", path)
389         res = self._get(path)
390         if not res:
391             return ''
392
393         self._debug("get_patch: len(content) = %d, content = '%s...'",
394                    len(res.content), res.content[:20])
395
396         return self.decode_patch(res.content)
397
398     def post_review(self, change, revision, review_input):
399         """
400         POST review_input for the given revision of change.
401         """
402         path = '/changes/' + change['id'] + '/revisions/' + revision + '/review'
403         self._debug("post_review: path = '%s'", path)
404         return self._post(path, review_input)
405
406     def check_patch(self, patch):
407         """
408         Run each script in CHECKPATCH_PATHS on patch, return a
409         ReviewInput() and score.
410         """
411         path_line_comments = {}
412         warning_count = [0]
413
414         for path in CHECKPATCH_PATHS:
415             pipe = subprocess.Popen([path] + CHECKPATCH_ARGS,
416                                     stdin=subprocess.PIPE,
417                                     stdout=subprocess.PIPE,
418                                     stderr=subprocess.PIPE)
419             out, err = pipe.communicate(patch)
420             self._debug("check_patch: path = %s, out = '%s...', err = '%s...'",
421                         path, out[:80], err[:80])
422             parse_checkpatch_output(out, path_line_comments, warning_count)
423
424         return review_input_and_score(path_line_comments, warning_count)
425
426     def change_needs_review(self, change):
427         """
428         * Bail if the change isn't open (status is not 'NEW').
429         * Bail if we've already reviewed the current revision.
430         """
431         status = change.get('status')
432         if status != 'NEW':
433             self._debug("change_needs_review: status = %s", status)
434             return False
435
436         current_revision = change.get('current_revision')
437         self._debug("change_needs_review: current_revision = '%s'",
438                     current_revision)
439         if not current_revision:
440             return False
441
442         # Have we already checked this revision?
443         if self.in_history(change['id'], current_revision):
444             self._debug("change_needs_review: already reviewed")
445             return False
446
447         return True
448
449     def review_change(self, change):
450         """
451         Review the current revision of change.
452         * Pipe the patch through checkpatch(es).
453         * Save results to review history.
454         * POST review to gerrit.
455         """
456         self._debug("review_change: change = %s, subject = '%s'",
457                     change['id'], change.get('subject', ''))
458
459         current_revision = change.get('current_revision')
460         self._debug("change_needs_review: current_revision = '%s'",
461                     current_revision)
462         if not current_revision:
463             return
464
465         patch = self.get_patch(change, current_revision)
466         if not patch:
467             self._debug("review_change: no patch")
468             return
469
470         review_input, score = self.check_patch(patch)
471         self._debug("review_change: score = %d", score)
472         self.write_history(change['id'], current_revision, score)
473         self.post_review(change, current_revision, review_input)
474
475     def update(self):
476         """
477         GET recently updated changes and review as needed.
478         """
479         new_timestamp = _now()
480         age = new_timestamp - self.timestamp + 60 * 60 # 1h padding
481         self._debug("update: age = %d", age)
482
483         open_changes = self.get_changes({'status':'open',
484                                          '-age':str(age) + 's'})
485         self._debug("update: got %d open_changes", len(open_changes))
486
487         for change in open_changes:
488             if self.change_needs_review(change):
489                 self.review_change(change)
490                 # Don't POST more than every post_interval seconds.
491                 time.sleep(self.post_interval)
492
493         self.timestamp = new_timestamp
494         self.write_history('-', '-', 0)
495
496     def update_single_change(self, change):
497
498         self.load_history()
499
500         open_changes = self.get_changes({'status':'open',
501                                          'change':change})
502         self._debug("update: got %d open_changes", len(open_changes))
503
504         for change in open_changes:
505             if self.change_needs_review(change):
506                 self.review_change(change)
507
508     def run(self):
509         """
510         * Load review history.
511         * Call update() every poll_interval seconds.
512         """
513
514         if self.timestamp <= 0:
515             self.load_history()
516
517         while True:
518             self.update()
519             time.sleep(self.update_interval)
520
521
522 def main():
523     """_"""
524     logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
525
526     with open(GERRIT_AUTH_PATH) as auth_file:
527         auth = json.load(auth_file)
528         username = auth[GERRIT_HOST]['gerrit/http']['username']
529         password = auth[GERRIT_HOST]['gerrit/http']['password']
530
531     reviewer = Reviewer(GERRIT_HOST, GERRIT_PROJECT, GERRIT_BRANCH,
532                         username, password, REVIEW_HISTORY_PATH)
533
534     if GERRIT_CHANGE_NUMBER:
535         reviewer.update_single_change(GERRIT_CHANGE_NUMBER)
536     else:
537         reviewer.run()
538
539
540 if __name__ == "__main__":
541     main()