Whamcloud - gitweb
Revert "LU-6245 libcfs: remove cfs_fs_time handling"
[fs/lustre-release.git] / contrib / scripts / gerrit_checkpatch.py
1 #!/usr/bin/env python
2 #
3 # GPL HEADER START
4 #
5 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License version 2 only,
9 # as published by the Free Software Foundation.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 # General Public License version 2 for more details (a copy is included
15 # in the LICENSE file that accompanied this code).
16 #
17 # You should have received a copy of the GNU General Public License
18 # version 2 along with this program; If not, see
19 # http://www.gnu.org/licenses/gpl-2.0.html
20 #
21 # GPL HEADER END
22 #
23 # Copyright (c) 2014, Intel Corporation.
24 #
25 # Author: John L. Hammond <john.hammond@intel.com>
26 #
27 """
28 Gerrit Checkpatch Reviewer Daemon
29 ~~~~~~ ~~~~~~~~~~ ~~~~~~~~ ~~~~~~
30
31 * Watch for new change revisions in a gerrit instance.
32 * Pass new revisions through checkpatch script.
33 * POST reviews back to gerrit based on checkpatch output.
34 """
35
36 import base64
37 import fnmatch
38 import logging
39 import json
40 import os
41 import requests
42 import subprocess
43 import time
44 import urllib
45
46 def _getenv_list(key, default=None, sep=':'):
47     """
48     'PATH' => ['/bin', '/usr/bin', ...]
49     """
50     value = os.getenv(key)
51     if value is None:
52         return default
53     else:
54         return value.split(sep)
55
56 GERRIT_HOST = os.getenv('GERRIT_HOST', 'review.whamcloud.com')
57 GERRIT_PROJECT = os.getenv('GERRIT_PROJECT', 'fs/lustre-release')
58 GERRIT_BRANCH = os.getenv('GERRIT_BRANCH', 'master')
59 GERRIT_AUTH_PATH = os.getenv('GERRIT_AUTH_PATH', 'GERRIT_AUTH')
60
61 # GERRIT_AUTH should contain a single JSON dictionary of the form:
62 # {
63 #     "review.example.com": {
64 #         "gerrit/http": {
65 #             "username": "example-checkpatch",
66 #             "password": "1234"
67 #         }
68 #     }
69 #     ...
70 # }
71
72 CHECKPATCH_PATHS = _getenv_list('CHECKPATCH_PATHS', ['checkpatch.pl'])
73 CHECKPATCH_IGNORED_FILES = _getenv_list('CHECKPATCH_IGNORED_FILES', [
74         'lustre/contrib/wireshark/packet-lustre.c',
75         'lustre/ptlrpc/wiretest.c',
76         'lustre/utils/wiretest.c',
77         '*.patch'])
78 CHECKPATCH_IGNORED_KINDS = _getenv_list('CHECKPATCH_IGNORED_KINDS', [
79         'LASSERT',
80         'LCONSOLE',
81         'LEADING_SPACE'])
82 REVIEW_HISTORY_PATH = os.getenv('REVIEW_HISTORY_PATH', 'REVIEW_HISTORY')
83 STYLE_LINK = os.getenv('STYLE_LINK',
84         'https://wiki.hpdd.intel.com/display/PUB/Coding+Guidelines')
85
86 USE_CODE_REVIEW_SCORE = False
87
88 def parse_checkpatch_output(out, path_line_comments, warning_count):
89     """
90     Parse string output out of CHECKPATCH into path_line_comments.
91     Increment warning_count[0] for each warning.
92
93     path_line_comments is { PATH: { LINE: [COMMENT, ...] }, ... }.
94     """
95     def add_comment(path, line, level, kind, message):
96         """_"""
97         logging.debug("add_comment %s %d %s %s '%s'",
98                       path, line, level, kind, message)
99         if kind in CHECKPATCH_IGNORED_KINDS:
100             return
101
102         for pattern in CHECKPATCH_IGNORED_FILES:
103             if fnmatch.fnmatch(path, pattern):
104                 return
105
106         path_comments = path_line_comments.setdefault(path, {})
107         line_comments = path_comments.setdefault(line, [])
108         line_comments.append('(style) ' + message)
109         warning_count[0] += 1
110
111     level = None # 'ERROR', 'WARNING'
112     kind = None # 'CODE_INDENT', 'LEADING_SPACE', ...
113     message = None # 'code indent should use tabs where possible'
114
115     for line in out.splitlines():
116         # ERROR:CODE_INDENT: code indent should use tabs where possible
117         # #404: FILE: lustre/liblustre/dir.c:103:
118         # +        op_data.op_hash_offset = hash_x_index(page->index, 0);$
119         line = line.strip()
120         if not line:
121             level, kind, message = None, None, None
122         elif line[0] == '#':
123             # '#404: FILE: lustre/liblustre/dir.c:103:'
124             tokens = line.split(':', 5)
125             if len(tokens) != 5 or tokens[1] != ' FILE':
126                 continue
127
128             path = tokens[2].strip()
129             line_number_str = tokens[3].strip()
130             if not line_number_str.isdigit():
131                 continue
132
133             line_number = int(line_number_str)
134
135             if path and level and kind and message:
136                 add_comment(path, line_number, level, kind, message)
137         elif line[0] == '+':
138             continue
139         else:
140             # ERROR:CODE_INDENT: code indent should use tabs where possible
141             try:
142                 level, kind, message = line.split(':', 2)
143             except ValueError:
144                 level, kind, message = None, None, None
145
146             if level != 'ERROR' and level != 'WARNING':
147                 level, kind, message = None, None, None
148
149
150 def review_input_and_score(path_line_comments, warning_count):
151     """
152     Convert { PATH: { LINE: [COMMENT, ...] }, ... }, [11] to a gerrit
153     ReviewInput() and score
154     """
155     review_comments = {}
156
157     for path, line_comments in path_line_comments.iteritems():
158         path_comments = []
159         for line, comment_list in line_comments.iteritems():
160             message = '\n'.join(comment_list)
161             path_comments.append({'line': line, 'message': message})
162         review_comments[path] = path_comments
163
164     if warning_count[0] > 0:
165         score = -1
166     else:
167         score = +1
168
169     if USE_CODE_REVIEW_SCORE:
170         code_review_score = score
171     else:
172         code_review_score = 0
173
174     if score < 0:
175         return {
176             'message': ('%d style warning(s).\nFor more details please see %s' %
177                         (warning_count[0], STYLE_LINK)),
178             'labels': {
179                 'Code-Review': code_review_score
180                 },
181             'comments': review_comments,
182             'notify': 'OWNER',
183             }, score
184     else:
185         return {
186             'message': 'Looks good to me.',
187             'labels': {
188                 'Code-Review': code_review_score
189                 },
190             'notify': 'NONE',
191             }, score
192
193
194 def _now():
195     """_"""
196     return long(time.time())
197
198
199 class Reviewer(object):
200     """
201     * Poll gerrit instance for updates to changes matching project and branch.
202     * Pipe new patches through checkpatch.
203     * Convert checkpatch output to gerrit ReviewInput().
204     * Post ReviewInput() to gerrit instance.
205     * Track reviewed revisions in history_path.
206     """
207     def __init__(self, host, project, branch, username, password, history_path):
208         self.host = host
209         self.project = project
210         self.branch = branch
211         self.auth = requests.auth.HTTPDigestAuth(username, password)
212         self.logger = logging.getLogger(__name__)
213         self.history_path = history_path
214         self.history_mode = 'rw'
215         self.history = {}
216         self.timestamp = 0L
217         self.post_enabled = True
218         self.post_interval = 10
219         self.update_interval = 300
220         self.request_timeout = 60
221
222     def _debug(self, msg, *args):
223         """_"""
224         self.logger.debug(msg, *args)
225
226     def _error(self, msg, *args):
227         """_"""
228         self.logger.error(msg, *args)
229
230     def _url(self, path):
231         """_"""
232         return 'http://' + self.host + '/a' + path
233
234     def _get(self, path):
235         """
236         GET path return Response.
237         """
238         url = self._url(path)
239         try:
240             res = requests.get(url, auth=self.auth,
241                                timeout=self.request_timeout)
242         except Exception as exc:
243             self._error("cannot GET '%s': exception = %s", url, str(exc))
244             return None
245
246         if res.status_code != requests.codes.ok:
247             self._error("cannot GET '%s': reason = %s, status_code = %d",
248                        url, res.reason, res.status_code)
249             return None
250
251         return res
252
253     def _post(self, path, obj):
254         """
255         POST json(obj) to path, return True on success.
256         """
257         url = self._url(path)
258         data = json.dumps(obj)
259         if not self.post_enabled:
260             self._debug("_post: disabled: url = '%s', data = '%s'", url, data)
261             return False
262
263         try:
264             res = requests.post(url, data=data,
265                                 headers={'Content-Type': 'application/json'},
266                                 auth=self.auth, timeout=self.request_timeout)
267         except Exception as exc:
268             self._error("cannot POST '%s': exception = %s", url, str(exc))
269             return False
270
271         if res.status_code != requests.codes.ok:
272             self._error("cannot POST '%s': reason = %s, status_code = %d",
273                        url, res.reason, res.status_code)
274             return False
275
276         return True
277
278     def load_history(self):
279         """
280         Load review history from history_path containing lines of the form:
281         EPOCH      FULL_CHANGE_ID                         REVISION    SCORE
282         1394536722 fs%2Flustre-release~master~I5cc6c23... 00e2cc75... 1
283         1394536721 -                                      -           0
284         1394537033 fs%2Flustre-release~master~I10be8e9... 44f7b504... 1
285         1394537032 -                                      -           0
286         1394537344 -                                      -           0
287         ...
288         """
289         if 'r' in self.history_mode:
290             with open(self.history_path) as history_file:
291                 for line in history_file:
292                     epoch, change_id, revision, score = line.split()
293                     if change_id == '-':
294                         self.timestamp = long(float(epoch))
295                     else:
296                         self.history[change_id + ' ' + revision] = score
297
298         self._debug("load_history: history size = %d, timestamp = %d",
299                     len(self.history), self.timestamp)
300
301     def write_history(self, change_id, revision, score, epoch=-1):
302         """
303         Add review record to history dict and file.
304         """
305         if change_id != '-':
306             self.history[change_id + ' ' + revision] = score
307
308         if epoch <= 0:
309             epoch = self.timestamp
310
311         if 'w' in self.history_mode:
312             with open(self.history_path, 'a') as history_file:
313                 print >> history_file, epoch, change_id, revision, score
314
315     def in_history(self, change_id, revision):
316         """
317         Return True if change_id/revision was already reviewed.
318         """
319         return change_id + ' ' + revision in self.history
320
321     def get_change_by_id(self, change_id):
322         """
323         GET one change by id.
324         """
325         path = ('/changes/' + urllib.quote(self.project, safe='') + '~' +
326                 urllib.quote(self.branch, safe='') + '~' + change_id +
327                 '?o=CURRENT_REVISION')
328         res = self._get(path)
329         if not res:
330             return None
331
332         # Gerrit uses " )]}'" to guard against XSSI.
333         return json.loads(res.content[5:])
334
335     def get_changes(self, query):
336         """
337         GET a list of ChangeInfo()s for all changes matching query.
338
339         {'status':'open', '-age':'60m'} =>
340           GET /changes/?q=project:...+status:open+-age:60m&o=CURRENT_REVISION =>
341             [ChangeInfo()...]
342         """
343         query = dict(query)
344         project = query.get('project', self.project)
345         query['project'] = urllib.quote(project, safe='')
346         branch = query.get('branch', self.branch)
347         query['branch'] = urllib.quote(branch, safe='')
348         path = ('/changes/?q=' +
349                 '+'.join(k + ':' + v for k, v in query.iteritems()) +
350                 '&o=CURRENT_REVISION')
351         res = self._get(path)
352         if not res:
353             return []
354
355         # Gerrit uses " )]}'" to guard against XSSI.
356         return json.loads(res.content[5:])
357
358     def decode_patch(self, content):
359         """
360         Decode gerrit's idea of base64.
361
362         The base64 encoded patch returned by gerrit isn't always
363         padded correctly according to b64decode. Don't know why. Work
364         around this by appending more '=' characters or truncating the
365         content until it decodes. But do try the unmodified content
366         first.
367         """
368         for i in (0, 1, 2, 3, -1, -2, -3):
369             if i >= 0:
370                 padded_content = content + (i * '=')
371             else:
372                 padded_content = content[:i]
373
374             try:
375                 return base64.b64decode(padded_content)
376             except TypeError as exc:
377                 self._debug("decode_patch: len = %d, exception = %s",
378                            len(padded_content), str(exc))
379         else:
380             return ''
381
382     def get_patch(self, change, revision='current'):
383         """
384         GET and decode the (current) patch for change.
385         """
386         path = '/changes/' + change['id'] + '/revisions/' + revision + '/patch'
387         self._debug("get_patch: path = '%s'", path)
388         res = self._get(path)
389         if not res:
390             return ''
391
392         self._debug("get_patch: len(content) = %d, content = '%s...'",
393                    len(res.content), res.content[:20])
394
395         return self.decode_patch(res.content)
396
397     def post_review(self, change, revision, review_input):
398         """
399         POST review_input for the given revision of change.
400         """
401         path = '/changes/' + change['id'] + '/revisions/' + revision + '/review'
402         self._debug("post_review: path = '%s'", path)
403         return self._post(path, review_input)
404
405     def check_patch(self, patch):
406         """
407         Run each script in CHECKPATCH_PATHS on patch, return a
408         ReviewInput() and score.
409         """
410         path_line_comments = {}
411         warning_count = [0]
412
413         for path in CHECKPATCH_PATHS:
414             pipe = subprocess.Popen([path, '--show-types', '-'],
415                                     stdin=subprocess.PIPE,
416                                     stdout=subprocess.PIPE,
417                                     stderr=subprocess.PIPE)
418             out, err = pipe.communicate(patch)
419             self._debug("check_patch: path = %s, out = '%s...', err = '%s...'",
420                         path, out[:80], err[:80])
421             parse_checkpatch_output(out, path_line_comments, warning_count)
422
423         return review_input_and_score(path_line_comments, warning_count)
424
425     def change_needs_review(self, change):
426         """
427         * Bail if the change isn't open (status is not 'NEW').
428         * Bail if we've already reviewed the current revision.
429         """
430         status = change.get('status')
431         if status != 'NEW':
432             self._debug("change_needs_review: status = %s", status)
433             return False
434
435         current_revision = change.get('current_revision')
436         self._debug("change_needs_review: current_revision = '%s'",
437                     current_revision)
438         if not current_revision:
439             return False
440
441         # Have we already checked this revision?
442         if self.in_history(change['id'], current_revision):
443             self._debug("change_needs_review: already reviewed")
444             return False
445
446         return True
447
448     def review_change(self, change):
449         """
450         Review the current revision of change.
451         * Pipe the patch through checkpatch(es).
452         * Save results to review history.
453         * POST review to gerrit.
454         """
455         self._debug("review_change: change = %s, subject = '%s'",
456                     change['id'], change.get('subject', ''))
457
458         current_revision = change.get('current_revision')
459         self._debug("change_needs_review: current_revision = '%s'",
460                     current_revision)
461         if not current_revision:
462             return
463
464         patch = self.get_patch(change, current_revision)
465         if not patch:
466             self._debug("review_change: no patch")
467             return
468
469         review_input, score = self.check_patch(patch)
470         self._debug("review_change: score = %d", score)
471         self.write_history(change['id'], current_revision, score)
472         self.post_review(change, current_revision, review_input)
473
474     def update(self):
475         """
476         GET recently updated changes and review as needed.
477         """
478         new_timestamp = _now()
479         age = new_timestamp - self.timestamp + 60 * 60 # 1h padding
480         self._debug("update: age = %d", age)
481
482         open_changes = self.get_changes({'status':'open',
483                                          '-age':str(age) + 's'})
484         self._debug("update: got %d open_changes", len(open_changes))
485
486         for change in open_changes:
487             if self.change_needs_review(change):
488                 self.review_change(change)
489                 # Don't POST more than every post_interval seconds.
490                 time.sleep(self.post_interval)
491
492         self.timestamp = new_timestamp
493         self.write_history('-', '-', 0)
494
495     def run(self):
496         """
497         * Load review history.
498         * Call update() every poll_interval seconds.
499         """
500
501         if self.timestamp <= 0:
502             self.load_history()
503
504         while True:
505             self.update()
506             time.sleep(self.update_interval)
507
508
509 def main():
510     """_"""
511     logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
512
513     with open(GERRIT_AUTH_PATH) as auth_file:
514         auth = json.load(auth_file)
515         username = auth[GERRIT_HOST]['gerrit/http']['username']
516         password = auth[GERRIT_HOST]['gerrit/http']['password']
517
518     reviewer = Reviewer(GERRIT_HOST, GERRIT_PROJECT, GERRIT_BRANCH,
519                         username, password, REVIEW_HISTORY_PATH)
520     reviewer.run()
521
522
523 if __name__ == "__main__":
524     main()