1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/lvfs/upcall_cache.c
38 * Supplementary groups cache.
41 #define DEBUG_SUBSYSTEM S_SEC
43 #ifndef AUTOCONF_INCLUDED
44 #include <linux/config.h>
46 #include <linux/module.h>
47 #include <linux/kernel.h>
49 #include <linux/kmod.h>
50 #include <linux/string.h>
51 #include <linux/stat.h>
52 #include <linux/errno.h>
53 #include <linux/version.h>
54 #include <linux/unistd.h>
56 #include <asm/system.h>
57 #include <asm/uaccess.h>
60 #include <linux/stat.h>
61 #include <asm/uaccess.h>
62 #include <linux/slab.h>
64 #include <obd_support.h>
65 #include <lustre_lib.h>
67 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
68 struct group_info *groups_alloc(int ngroups)
70 struct group_info *ginfo;
72 LASSERT(ngroups <= NGROUPS_SMALL);
74 OBD_ALLOC(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
77 ginfo->ngroups = ngroups;
79 ginfo->blocks[0] = ginfo->small_block;
80 atomic_set(&ginfo->usage, 1);
85 void groups_free(struct group_info *ginfo)
87 LASSERT(ginfo->ngroups <= NGROUPS_SMALL);
88 LASSERT(ginfo->nblocks == 1);
89 LASSERT(ginfo->blocks[0] == ginfo->small_block);
91 OBD_FREE(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
95 static struct upcall_cache_entry *alloc_entry(__u64 key)
97 struct upcall_cache_entry *entry;
99 OBD_ALLOC(entry, sizeof(*entry));
103 UC_CACHE_SET_NEW(entry);
104 INIT_LIST_HEAD(&entry->ue_hash);
106 atomic_set(&entry->ue_refcount, 0);
107 init_waitqueue_head(&entry->ue_waitq);
111 /* protected by hash lock */
112 static void free_entry(struct upcall_cache_entry *entry)
114 if (entry->ue_group_info)
115 put_group_info(entry->ue_group_info);
116 list_del(&entry->ue_hash);
117 CDEBUG(D_OTHER, "destroy cache entry %p for key "LPU64"\n",
118 entry, entry->ue_key);
119 OBD_FREE(entry, sizeof(*entry));
122 static void get_entry(struct upcall_cache_entry *entry)
124 atomic_inc(&entry->ue_refcount);
127 static void put_entry(struct upcall_cache_entry *entry)
129 if (atomic_dec_and_test(&entry->ue_refcount) &&
130 (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry))) {
135 static int check_unlink_entry(struct upcall_cache_entry *entry)
137 if (UC_CACHE_IS_VALID(entry) &&
138 time_before(jiffies, entry->ue_expire))
141 if (UC_CACHE_IS_ACQUIRING(entry)) {
142 if (time_before(jiffies, entry->ue_acquire_expire))
145 UC_CACHE_SET_EXPIRED(entry);
146 wake_up_all(&entry->ue_waitq);
147 } else if (!UC_CACHE_IS_INVALID(entry)) {
148 UC_CACHE_SET_EXPIRED(entry);
151 list_del_init(&entry->ue_hash);
152 if (!atomic_read(&entry->ue_refcount))
157 static int refresh_entry(struct upcall_cache *hash,
158 struct upcall_cache_entry *entry)
166 snprintf(keystr, 16, LPU64, entry->ue_key);
168 CDEBUG(D_INFO, "The groups upcall is: %s \n", hash->uc_upcall);
169 argv[0] = hash->uc_upcall;
170 argv[1] = hash->uc_name;
175 envp[1] = "PATH=/sbin:/usr/sbin";
178 rc = USERMODEHELPER(argv[0], argv, envp);
180 CERROR("%s: error invoking getgroups upcall %s %s %s: rc %d; "
181 "check /proc/fs/lustre/mds/%s/group_upcall\n",
182 hash->uc_name, argv[0], argv[1], argv[2], rc, argv[1]);
184 CDEBUG(D_HA, "%s: invoked upcall %s %s %s\n", hash->uc_name,
185 argv[0], argv[1], argv[2]);
191 static int entry_set_group_info(struct upcall_cache_entry *entry, __u32 primary,
192 __u32 ngroups, __u32 *groups)
194 struct group_info *ginfo;
198 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
199 if (ngroups > NGROUPS)
203 if (ngroups > NGROUPS_MAX) {
204 CERROR("using first %d supplementary groups for uid "LPU64"\n",
205 NGROUPS_MAX, entry->ue_key);
206 ngroups = NGROUPS_MAX;
209 ginfo = groups_alloc(ngroups);
211 CERROR("uid "LPU64" update can't alloc ginfo for %d groups\n",
212 entry->ue_key, ngroups);
215 entry->ue_group_info = ginfo;
216 entry->ue_primary = primary;
218 for (i = 0; i < ginfo->nblocks; i++) {
219 int cp_count = min_t(int, NGROUPS_PER_BLOCK, ngroups);
220 int off = i * NGROUPS_PER_BLOCK;
222 for (j = 0; j < cp_count; j++)
223 ginfo->blocks[i][j] = groups[off + j];
230 struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash,
231 __u64 key, __u32 primary,
232 __u32 ngroups, __u32 *groups)
234 struct upcall_cache_entry *entry = NULL, *new = NULL, *next;
235 struct list_head *head;
242 if (strcmp(hash->uc_upcall, "NONE") == 0) {
243 new = alloc_entry(key);
245 CERROR("fail to alloc entry\n");
250 /* We have to sort the groups for 2.6 kernels */
251 LASSERT(ngroups <= 2);
252 if (ngroups == 2 && groups[1] == -1)
254 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
255 /* 2.6 needs groups array sorted */
256 if (ngroups == 2 && groups[0] > groups[1]) {
257 __u32 tmp = groups[1];
258 groups[1] = groups[0];
262 if (ngroups > 0 && groups[0] == -1) {
263 groups[0] = groups[1];
267 rc = entry_set_group_info(new, primary, ngroups, groups);
269 /* We can't cache this entry as it only has a subset of
270 * the user's groups, as sent in suppgid1, suppgid2. */
271 UC_CACHE_SET_EXPIRED(new);
274 head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
277 spin_lock(&hash->uc_lock);
278 list_for_each_entry_safe(entry, next, head, ue_hash) {
279 /* check invalid & expired items */
280 if (check_unlink_entry(entry))
282 if (entry->ue_key == key) {
288 if (!found) { /* didn't find it */
290 spin_unlock(&hash->uc_lock);
291 new = alloc_entry(key);
293 CERROR("fail to alloc entry\n");
294 RETURN(ERR_PTR(-ENOMEM));
298 list_add(&new->ue_hash, head);
306 list_move(&entry->ue_hash, head);
310 /* acquire for new one */
311 if (UC_CACHE_IS_NEW(entry)) {
312 UC_CACHE_SET_ACQUIRING(entry);
313 UC_CACHE_CLEAR_NEW(entry);
314 entry->ue_acquire_expire = jiffies + hash->uc_acquire_expire;
315 spin_unlock(&hash->uc_lock);
316 rc = refresh_entry(hash, entry);
317 spin_lock(&hash->uc_lock);
319 UC_CACHE_CLEAR_ACQUIRING(entry);
320 UC_CACHE_SET_INVALID(entry);
324 /* someone (and only one) is doing upcall upon
325 * this item, just wait it complete
327 if (UC_CACHE_IS_ACQUIRING(entry)) {
328 init_waitqueue_entry(&wait, current);
329 add_wait_queue(&entry->ue_waitq, &wait);
330 set_current_state(TASK_INTERRUPTIBLE);
331 spin_unlock(&hash->uc_lock);
333 schedule_timeout(hash->uc_acquire_expire);
335 spin_lock(&hash->uc_lock);
336 remove_wait_queue(&entry->ue_waitq, &wait);
337 if (UC_CACHE_IS_ACQUIRING(entry)) {
338 static unsigned long next;
339 /* we're interrupted or upcall failed in the middle */
340 if (time_after(jiffies, next)) {
341 CERROR("acquire timeout exceeded for key "LPU64
342 "\n", entry->ue_key);
343 next = jiffies + 1800;
346 GOTO(out, entry = ERR_PTR(-EIDRM));
351 /* invalid means error, don't need to try again */
352 if (UC_CACHE_IS_INVALID(entry)) {
354 GOTO(out, entry = ERR_PTR(-EIDRM));
358 * We can't refresh the existing one because some
359 * memory might be shared by multiple processes.
361 if (check_unlink_entry(entry)) {
362 /* if expired, try again. but if this entry is
363 * created by me but too quickly turn to expired
364 * without any error, should at least give a
365 * chance to use it once.
369 spin_unlock(&hash->uc_lock);
375 /* Now we know it's good */
377 spin_unlock(&hash->uc_lock);
380 EXPORT_SYMBOL(upcall_cache_get_entry);
382 void upcall_cache_put_entry(struct upcall_cache *hash,
383 struct upcall_cache_entry *entry)
392 LASSERT(atomic_read(&entry->ue_refcount) > 0);
393 spin_lock(&hash->uc_lock);
395 spin_unlock(&hash->uc_lock);
398 EXPORT_SYMBOL(upcall_cache_put_entry);
400 int upcall_cache_downcall(struct upcall_cache *hash, __u32 err, __u64 key,
401 __u32 primary, __u32 ngroups, __u32 *groups)
403 struct upcall_cache_entry *entry = NULL;
404 struct list_head *head;
405 int found = 0, rc = 0;
410 head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
412 spin_lock(&hash->uc_lock);
413 list_for_each_entry(entry, head, ue_hash) {
414 if (entry->ue_key == key) {
422 CDEBUG(D_OTHER, "%s: upcall for key "LPU64" not expected\n",
423 hash->uc_name, entry->ue_key);
424 /* haven't found, it's possible */
425 spin_unlock(&hash->uc_lock);
430 CDEBUG(D_OTHER, "%s: upcall for key "LPU64" returned %d\n",
431 hash->uc_name, entry->ue_key, err);
432 GOTO(out, rc = -EINVAL);
435 if (!UC_CACHE_IS_ACQUIRING(entry)) {
436 CDEBUG(D_RPCTRACE,"%s: found uptodate entry %p (key "LPU64")\n",
437 hash->uc_name, entry, entry->ue_key);
441 if (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry)) {
442 CERROR("%s: found a stale entry %p (key "LPU64") in ioctl\n",
443 hash->uc_name, entry, entry->ue_key);
444 GOTO(out, rc = -EINVAL);
447 spin_unlock(&hash->uc_lock);
448 rc = entry_set_group_info(entry, primary, ngroups, groups);
449 spin_lock(&hash->uc_lock);
453 entry->ue_expire = jiffies + hash->uc_entry_expire;
454 UC_CACHE_SET_VALID(entry);
455 CDEBUG(D_OTHER, "%s: created upcall cache entry %p for key "LPU64"\n",
456 hash->uc_name, entry, entry->ue_key);
459 UC_CACHE_SET_INVALID(entry);
460 list_del_init(&entry->ue_hash);
462 UC_CACHE_CLEAR_ACQUIRING(entry);
463 spin_unlock(&hash->uc_lock);
464 wake_up_all(&entry->ue_waitq);
469 EXPORT_SYMBOL(upcall_cache_downcall);
471 static void cache_flush(struct upcall_cache *hash, int force)
473 struct upcall_cache_entry *entry, *next;
477 spin_lock(&hash->uc_lock);
478 for (i = 0; i < UC_CACHE_HASH_SIZE; i++) {
479 list_for_each_entry_safe(entry, next,
480 &hash->uc_hashtable[i], ue_hash) {
481 if (!force && atomic_read(&entry->ue_refcount)) {
482 UC_CACHE_SET_EXPIRED(entry);
485 LASSERT(!atomic_read(&entry->ue_refcount));
489 spin_unlock(&hash->uc_lock);
493 void upcall_cache_flush_idle(struct upcall_cache *cache)
495 cache_flush(cache, 0);
497 EXPORT_SYMBOL(upcall_cache_flush_idle);
499 void upcall_cache_flush_all(struct upcall_cache *cache)
501 cache_flush(cache, 1);
503 EXPORT_SYMBOL(upcall_cache_flush_all);
505 struct upcall_cache *upcall_cache_init(const char *name)
507 struct upcall_cache *hash;
511 OBD_ALLOC(hash, sizeof(*hash));
513 RETURN(ERR_PTR(-ENOMEM));
515 spin_lock_init(&hash->uc_lock);
516 for (i = 0; i < UC_CACHE_HASH_SIZE; i++)
517 INIT_LIST_HEAD(&hash->uc_hashtable[i]);
518 strncpy(hash->uc_name, name, sizeof(hash->uc_name) - 1);
519 /* set default value, proc tunable */
520 strcpy(hash->uc_upcall, "NONE");
521 hash->uc_entry_expire = 10 * 60 * HZ;
522 hash->uc_acquire_expire = 15 * HZ;
526 EXPORT_SYMBOL(upcall_cache_init);
528 void upcall_cache_cleanup(struct upcall_cache *hash)
532 upcall_cache_flush_all(hash);
533 OBD_FREE(hash, sizeof(*hash));
535 EXPORT_SYMBOL(upcall_cache_cleanup);