Whamcloud - gitweb
Changelog update
[fs/lustre-release.git] / lustre / lvfs / upcall_cache.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/lvfs/upcall_cache.c
37  *
38  * Supplementary groups cache.
39  */
40
41 #define DEBUG_SUBSYSTEM S_SEC
42
43 #ifndef AUTOCONF_INCLUDED
44 #include <linux/config.h>
45 #endif
46 #include <linux/module.h>
47 #include <linux/kernel.h>
48 #include <linux/mm.h>
49 #include <linux/kmod.h>
50 #include <linux/string.h>
51 #include <linux/stat.h>
52 #include <linux/errno.h>
53 #include <linux/version.h>
54 #include <linux/unistd.h>
55
56 #include <asm/system.h>
57 #include <asm/uaccess.h>
58
59 #include <linux/fs.h>
60 #include <linux/stat.h>
61 #include <asm/uaccess.h>
62 #include <linux/slab.h>
63
64 #include <obd_support.h>
65 #include <lustre_lib.h>
66
67 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
68 struct group_info *groups_alloc(int ngroups)
69 {
70         struct group_info *ginfo;
71
72         LASSERT(ngroups <= NGROUPS_SMALL);
73
74         OBD_ALLOC(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
75         if (!ginfo)
76                 return NULL;
77         ginfo->ngroups = ngroups;
78         ginfo->nblocks = 1;
79         ginfo->blocks[0] = ginfo->small_block;
80         atomic_set(&ginfo->usage, 1);
81
82         return ginfo;
83 }
84
85 void groups_free(struct group_info *ginfo)
86 {
87         LASSERT(ginfo->ngroups <= NGROUPS_SMALL);
88         LASSERT(ginfo->nblocks == 1);
89         LASSERT(ginfo->blocks[0] == ginfo->small_block);
90
91         OBD_FREE(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
92 }
93 #endif
94
95 static struct upcall_cache_entry *alloc_entry(__u64 key)
96 {
97         struct upcall_cache_entry *entry;
98
99         OBD_ALLOC(entry, sizeof(*entry));
100         if (!entry)
101                 return NULL;
102
103         UC_CACHE_SET_NEW(entry);
104         INIT_LIST_HEAD(&entry->ue_hash);
105         entry->ue_key = key;
106         atomic_set(&entry->ue_refcount, 0);
107         init_waitqueue_head(&entry->ue_waitq);
108         return entry;
109 }
110
111 /* protected by hash lock */
112 static void free_entry(struct upcall_cache_entry *entry)
113 {
114         if (entry->ue_group_info)
115                 put_group_info(entry->ue_group_info);
116         list_del(&entry->ue_hash);
117         CDEBUG(D_OTHER, "destroy cache entry %p for key "LPU64"\n",
118                entry, entry->ue_key);
119         OBD_FREE(entry, sizeof(*entry));
120 }
121
122 static void get_entry(struct upcall_cache_entry *entry)
123 {
124         atomic_inc(&entry->ue_refcount);
125 }
126
127 static void put_entry(struct upcall_cache_entry *entry)
128 {
129         if (atomic_dec_and_test(&entry->ue_refcount) &&
130             (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry))) {
131                 free_entry(entry);
132         }
133 }
134
135 static int check_unlink_entry(struct upcall_cache_entry *entry)
136 {
137         if (UC_CACHE_IS_VALID(entry) &&
138             time_before(jiffies, entry->ue_expire))
139                 return 0;
140
141         if (UC_CACHE_IS_ACQUIRING(entry)) {
142                 if (time_before(jiffies, entry->ue_acquire_expire))
143                         return 0;
144
145                 UC_CACHE_SET_EXPIRED(entry);
146                 wake_up_all(&entry->ue_waitq);
147         } else if (!UC_CACHE_IS_INVALID(entry)) {
148                 UC_CACHE_SET_EXPIRED(entry);
149         }
150
151         list_del_init(&entry->ue_hash);
152         if (!atomic_read(&entry->ue_refcount))
153                 free_entry(entry);
154         return 1;
155 }
156
157 static int refresh_entry(struct upcall_cache *hash,
158                          struct upcall_cache_entry *entry)
159 {
160         char *argv[4];
161         char *envp[3];
162         char keystr[16];
163         int rc;
164         ENTRY;
165
166         snprintf(keystr, 16, LPU64, entry->ue_key);
167
168         CDEBUG(D_INFO, "The groups upcall is: %s \n", hash->uc_upcall);
169         argv[0] = hash->uc_upcall;
170         argv[1] = hash->uc_name;
171         argv[2] = keystr;
172         argv[3] = NULL;
173
174         envp[0] = "HOME=/";
175         envp[1] = "PATH=/sbin:/usr/sbin";
176         envp[2] = NULL;
177
178         rc = USERMODEHELPER(argv[0], argv, envp);
179         if (rc < 0) {
180                 CERROR("%s: error invoking getgroups upcall %s %s %s: rc %d; "
181                        "check /proc/fs/lustre/mds/%s/group_upcall\n",
182                        hash->uc_name, argv[0], argv[1], argv[2], rc, argv[1]);
183         } else {
184                 CDEBUG(D_HA, "%s: invoked upcall %s %s %s\n", hash->uc_name,
185                        argv[0], argv[1], argv[2]);
186                 rc = 0;
187         }
188         RETURN(rc);
189 }
190
191 static int entry_set_group_info(struct upcall_cache_entry *entry, __u32 primary,
192                                 __u32 ngroups, __u32 *groups)
193 {
194         struct group_info *ginfo;
195         int i, j;
196         ENTRY;
197
198 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
199         if (ngroups > NGROUPS)
200                 ngroups = NGROUPS;
201 #endif
202
203         if (ngroups > NGROUPS_MAX) {
204                 CERROR("using first %d supplementary groups for uid "LPU64"\n",
205                        NGROUPS_MAX, entry->ue_key);
206                 ngroups = NGROUPS_MAX;
207         }
208
209         ginfo = groups_alloc(ngroups);
210         if (!ginfo) {
211                 CERROR("uid "LPU64" update can't alloc ginfo for %d groups\n",
212                        entry->ue_key, ngroups);
213                 RETURN(-ENOMEM);
214         }
215         entry->ue_group_info = ginfo;
216         entry->ue_primary = primary;
217
218         for (i = 0; i < ginfo->nblocks; i++) {
219                 int cp_count = min_t(int, NGROUPS_PER_BLOCK, ngroups);
220                 int off = i * NGROUPS_PER_BLOCK;
221
222                 for (j = 0; j < cp_count; j++)
223                         ginfo->blocks[i][j] = groups[off + j];
224
225                 ngroups -= cp_count;
226         }
227         RETURN(0);
228 }
229
230 struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash,
231                                                   __u64 key, __u32 primary,
232                                                   __u32 ngroups, __u32 *groups)
233 {
234         struct upcall_cache_entry *entry = NULL, *new = NULL, *next;
235         struct list_head *head;
236         wait_queue_t wait;
237         int rc, found;
238         ENTRY;
239
240         LASSERT(hash);
241
242         if (strcmp(hash->uc_upcall, "NONE") == 0) {
243                 new = alloc_entry(key);
244                 if (!new) {
245                         CERROR("fail to alloc entry\n");
246                         RETURN(NULL);
247                 }
248                 get_entry(new);
249
250                 /* We have to sort the groups for 2.6 kernels */
251                 LASSERT(ngroups <= 2);
252                 if (ngroups == 2 && groups[1] == -1)
253                         ngroups--;
254 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
255                 /* 2.6 needs groups array sorted */
256                 if (ngroups == 2 && groups[0] > groups[1]) {
257                         __u32 tmp = groups[1];
258                         groups[1] = groups[0];
259                         groups[0] = tmp;
260                 }
261 #endif
262                 if (ngroups > 0 && groups[0] == -1) {
263                         groups[0] = groups[1];
264                         ngroups--;
265                 }
266
267                 rc = entry_set_group_info(new, primary, ngroups, groups);
268
269                 /* We can't cache this entry as it only has a subset of
270                  * the user's groups, as sent in suppgid1, suppgid2. */
271                 UC_CACHE_SET_EXPIRED(new);
272                 RETURN(new);
273         }
274         head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
275 find_again:
276         found = 0;
277         spin_lock(&hash->uc_lock);
278         list_for_each_entry_safe(entry, next, head, ue_hash) {
279                 /* check invalid & expired items */
280                 if (check_unlink_entry(entry))
281                         continue;
282                 if (entry->ue_key == key) {
283                         found = 1;
284                         break;
285                 }
286         }
287
288         if (!found) { /* didn't find it */
289                 if (!new) {
290                         spin_unlock(&hash->uc_lock);
291                         new = alloc_entry(key);
292                         if (!new) {
293                                 CERROR("fail to alloc entry\n");
294                                 RETURN(ERR_PTR(-ENOMEM));
295                         }
296                         goto find_again;
297                 } else {
298                         list_add(&new->ue_hash, head);
299                         entry = new;
300                 }
301         } else {
302                 if (new) {
303                         free_entry(new);
304                         new = NULL;
305                 }
306                 list_move(&entry->ue_hash, head);
307         }
308         get_entry(entry);
309
310         /* acquire for new one */
311         if (UC_CACHE_IS_NEW(entry)) {
312                 UC_CACHE_SET_ACQUIRING(entry);
313                 UC_CACHE_CLEAR_NEW(entry);
314                 entry->ue_acquire_expire = jiffies + hash->uc_acquire_expire;
315                 spin_unlock(&hash->uc_lock);
316                 rc = refresh_entry(hash, entry);
317                 spin_lock(&hash->uc_lock);
318                 if (rc < 0) {
319                         UC_CACHE_CLEAR_ACQUIRING(entry);
320                         UC_CACHE_SET_INVALID(entry);
321                 }
322                 /* fall through */
323         }
324         /* someone (and only one) is doing upcall upon
325          * this item, just wait it complete
326          */
327         if (UC_CACHE_IS_ACQUIRING(entry)) {
328                 init_waitqueue_entry(&wait, current);
329                 add_wait_queue(&entry->ue_waitq, &wait);
330                 set_current_state(TASK_INTERRUPTIBLE);
331                 spin_unlock(&hash->uc_lock);
332
333                 schedule_timeout(hash->uc_acquire_expire);
334
335                 spin_lock(&hash->uc_lock);
336                 remove_wait_queue(&entry->ue_waitq, &wait);
337                 if (UC_CACHE_IS_ACQUIRING(entry)) {
338                         static unsigned long next;
339                         /* we're interrupted or upcall failed in the middle */
340                         if (time_after(jiffies, next)) {
341                                 CERROR("acquire timeout exceeded for key "LPU64
342                                        "\n", entry->ue_key);
343                                 next = jiffies + 1800;
344                         }
345                         put_entry(entry);
346                         GOTO(out, entry = ERR_PTR(-EIDRM));
347                 }
348                 /* fall through */
349         }
350
351         /* invalid means error, don't need to try again */
352         if (UC_CACHE_IS_INVALID(entry)) {
353                 put_entry(entry);
354                 GOTO(out, entry = ERR_PTR(-EIDRM));
355         }
356
357         /* check expired
358          * We can't refresh the existing one because some
359          * memory might be shared by multiple processes.
360          */
361         if (check_unlink_entry(entry)) {
362                 /* if expired, try again. but if this entry is
363                  * created by me but too quickly turn to expired
364                  * without any error, should at least give a
365                  * chance to use it once.
366                  */
367                 if (entry != new) {
368                         put_entry(entry);
369                         spin_unlock(&hash->uc_lock);
370                         new = NULL;
371                         goto find_again;
372                 }
373         }
374
375         /* Now we know it's good */
376 out:
377         spin_unlock(&hash->uc_lock);
378         RETURN(entry);
379 }
380 EXPORT_SYMBOL(upcall_cache_get_entry);
381
382 void upcall_cache_put_entry(struct upcall_cache *hash,
383                             struct upcall_cache_entry *entry)
384 {
385         ENTRY;
386
387         if (!entry) {
388                 EXIT;
389                 return;
390         }
391
392         LASSERT(atomic_read(&entry->ue_refcount) > 0);
393         spin_lock(&hash->uc_lock);
394         put_entry(entry);
395         spin_unlock(&hash->uc_lock);
396         EXIT;
397 }
398 EXPORT_SYMBOL(upcall_cache_put_entry);
399
400 int upcall_cache_downcall(struct upcall_cache *hash, __u32 err, __u64 key,
401                           __u32 primary, __u32 ngroups, __u32 *groups)
402 {
403         struct upcall_cache_entry *entry = NULL;
404         struct list_head *head;
405         int found = 0, rc = 0;
406         ENTRY;
407
408         LASSERT(hash);
409
410         head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
411
412         spin_lock(&hash->uc_lock);
413         list_for_each_entry(entry, head, ue_hash) {
414                 if (entry->ue_key == key) {
415                         found = 1;
416                         get_entry(entry);
417                         break;
418                 }
419         }
420
421         if (!found) {
422                 CDEBUG(D_OTHER, "%s: upcall for key "LPU64" not expected\n",
423                        hash->uc_name, entry->ue_key);
424                 /* haven't found, it's possible */
425                 spin_unlock(&hash->uc_lock);
426                 RETURN(-EINVAL);
427         }
428
429         if (err) {
430                 CDEBUG(D_OTHER, "%s: upcall for key "LPU64" returned %d\n",
431                        hash->uc_name, entry->ue_key, err);
432                 GOTO(out, rc = -EINVAL);
433         }
434
435         if (!UC_CACHE_IS_ACQUIRING(entry)) {
436                 CDEBUG(D_RPCTRACE,"%s: found uptodate entry %p (key "LPU64")\n",
437                        hash->uc_name, entry, entry->ue_key);
438                 GOTO(out, rc = 0);
439         }
440
441         if (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry)) {
442                 CERROR("%s: found a stale entry %p (key "LPU64") in ioctl\n",
443                        hash->uc_name, entry, entry->ue_key);
444                 GOTO(out, rc = -EINVAL);
445         }
446
447         spin_unlock(&hash->uc_lock);
448         rc = entry_set_group_info(entry, primary, ngroups, groups);
449         spin_lock(&hash->uc_lock);
450         if (rc)
451                 GOTO(out, rc);
452
453         entry->ue_expire = jiffies + hash->uc_entry_expire;
454         UC_CACHE_SET_VALID(entry);
455         CDEBUG(D_OTHER, "%s: created upcall cache entry %p for key "LPU64"\n",
456                hash->uc_name, entry, entry->ue_key);
457 out:
458         if (rc) {
459                 UC_CACHE_SET_INVALID(entry);
460                 list_del_init(&entry->ue_hash);
461         }
462         UC_CACHE_CLEAR_ACQUIRING(entry);
463         spin_unlock(&hash->uc_lock);
464         wake_up_all(&entry->ue_waitq);
465         put_entry(entry);
466
467         RETURN(rc);
468 }
469 EXPORT_SYMBOL(upcall_cache_downcall);
470
471 static void cache_flush(struct upcall_cache *hash, int force)
472 {
473         struct upcall_cache_entry *entry, *next;
474         int i;
475         ENTRY;
476
477         spin_lock(&hash->uc_lock);
478         for (i = 0; i < UC_CACHE_HASH_SIZE; i++) {
479                 list_for_each_entry_safe(entry, next,
480                                          &hash->uc_hashtable[i], ue_hash) {
481                         if (!force && atomic_read(&entry->ue_refcount)) {
482                                 UC_CACHE_SET_EXPIRED(entry);
483                                 continue;
484                         }
485                         LASSERT(!atomic_read(&entry->ue_refcount));
486                         free_entry(entry);
487                 }
488         }
489         spin_unlock(&hash->uc_lock);
490         EXIT;
491 }
492
493 void upcall_cache_flush_idle(struct upcall_cache *cache)
494 {
495         cache_flush(cache, 0);
496 }
497 EXPORT_SYMBOL(upcall_cache_flush_idle);
498
499 void upcall_cache_flush_all(struct upcall_cache *cache)
500 {
501         cache_flush(cache, 1);
502 }
503 EXPORT_SYMBOL(upcall_cache_flush_all);
504
505 struct upcall_cache *upcall_cache_init(const char *name)
506 {
507         struct upcall_cache *hash;
508         int i;
509         ENTRY;
510
511         OBD_ALLOC(hash, sizeof(*hash));
512         if (!hash)
513                 RETURN(ERR_PTR(-ENOMEM));
514
515         spin_lock_init(&hash->uc_lock);
516         for (i = 0; i < UC_CACHE_HASH_SIZE; i++)
517                 INIT_LIST_HEAD(&hash->uc_hashtable[i]);
518         strncpy(hash->uc_name, name, sizeof(hash->uc_name) - 1);
519         /* set default value, proc tunable */
520         strcpy(hash->uc_upcall, "NONE");
521         hash->uc_entry_expire = 10 * 60 * HZ;
522         hash->uc_acquire_expire = 15 * HZ;
523
524         RETURN(hash);
525 }
526 EXPORT_SYMBOL(upcall_cache_init);
527
528 void upcall_cache_cleanup(struct upcall_cache *hash)
529 {
530         if (!hash)
531                 return;
532         upcall_cache_flush_all(hash);
533         OBD_FREE(hash, sizeof(*hash));
534 }
535 EXPORT_SYMBOL(upcall_cache_cleanup);