1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Supplementary groups cache.
6 * Copyright (c) 2004 Cluster File Systems, Inc.
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #define DEBUG_SUBSYSTEM S_SEC
26 #ifdef HAVE_KERNEL_CONFIG_H
27 #include <linux/config.h>
29 #include <linux/module.h>
30 #include <linux/kernel.h>
32 #include <linux/kmod.h>
33 #include <linux/string.h>
34 #include <linux/stat.h>
35 #include <linux/errno.h>
36 #include <linux/version.h>
37 #include <linux/unistd.h>
39 #include <asm/system.h>
40 #include <asm/uaccess.h>
43 #include <linux/stat.h>
44 #include <asm/uaccess.h>
45 #include <linux/slab.h>
46 #include <asm/segment.h>
48 #include <obd_support.h>
49 #include <lustre_lib.h>
51 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
52 struct group_info *groups_alloc(int ngroups)
54 struct group_info *ginfo;
56 LASSERT(ngroups <= NGROUPS_SMALL);
58 OBD_ALLOC(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
61 ginfo->ngroups = ngroups;
63 ginfo->blocks[0] = ginfo->small_block;
64 atomic_set(&ginfo->usage, 1);
69 void groups_free(struct group_info *ginfo)
71 LASSERT(ginfo->ngroups <= NGROUPS_SMALL);
72 LASSERT(ginfo->nblocks == 1);
73 LASSERT(ginfo->blocks[0] == ginfo->small_block);
75 OBD_FREE(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
79 static struct upcall_cache_entry *alloc_entry(__u64 key)
81 struct upcall_cache_entry *entry;
83 OBD_ALLOC(entry, sizeof(*entry));
87 UC_CACHE_SET_NEW(entry);
88 INIT_LIST_HEAD(&entry->ue_hash);
90 atomic_set(&entry->ue_refcount, 0);
91 init_waitqueue_head(&entry->ue_waitq);
95 /* protected by hash lock */
96 static void free_entry(struct upcall_cache_entry *entry)
98 if (entry->ue_group_info)
99 groups_free(entry->ue_group_info);
100 list_del(&entry->ue_hash);
101 CDEBUG(D_OTHER, "destroy cache entry %p for key "LPU64"\n",
102 entry, entry->ue_key);
103 OBD_FREE(entry, sizeof(*entry));
106 static void get_entry(struct upcall_cache_entry *entry)
108 atomic_inc(&entry->ue_refcount);
111 static void put_entry(struct upcall_cache_entry *entry)
113 if (atomic_dec_and_test(&entry->ue_refcount) &&
114 (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry))) {
119 static int check_unlink_entry(struct upcall_cache_entry *entry)
121 if (UC_CACHE_IS_VALID(entry) &&
122 time_before(jiffies, entry->ue_expire))
125 if (UC_CACHE_IS_ACQUIRING(entry)) {
126 if (time_before(jiffies, entry->ue_acquire_expire))
129 UC_CACHE_SET_EXPIRED(entry);
130 wake_up_all(&entry->ue_waitq);
131 } else if (!UC_CACHE_IS_INVALID(entry)) {
132 UC_CACHE_SET_EXPIRED(entry);
135 list_del_init(&entry->ue_hash);
136 if (!atomic_read(&entry->ue_refcount))
141 static int refresh_entry(struct upcall_cache *hash,
142 struct upcall_cache_entry *entry)
150 snprintf(keystr, 16, LPU64, entry->ue_key);
152 CDEBUG(D_INFO, "The groups upcall is: %s \n", hash->uc_upcall);
153 argv[0] = hash->uc_upcall;
154 argv[1] = hash->uc_name;
159 envp[1] = "PATH=/sbin:/usr/sbin";
162 rc = USERMODEHELPER(argv[0], argv, envp);
164 CERROR("%s: error invoking getgroups upcall %s %s %s: rc %d; "
165 "check /proc/fs/lustre/mds/%s/group_upcall\n",
166 hash->uc_name, argv[0], argv[1], argv[2], rc, argv[1]);
168 CDEBUG(D_HA, "%s: invoked upcall %s %s %s\n", hash->uc_name,
169 argv[0], argv[1], argv[2]);
175 static int entry_set_group_info(struct upcall_cache_entry *entry, __u32 primary,
176 __u32 ngroups, __u32 *groups)
178 struct group_info *ginfo;
182 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
183 if (ngroups > NGROUPS)
187 if (ngroups > NGROUPS_MAX) {
188 CERROR("using first %d supplementary groups for uid "LPU64"\n",
189 NGROUPS_MAX, entry->ue_key);
190 ngroups = NGROUPS_MAX;
193 ginfo = groups_alloc(ngroups);
195 CERROR("uid "LPU64" update can't alloc ginfo for %d groups\n",
196 entry->ue_key, ngroups);
199 entry->ue_group_info = ginfo;
200 entry->ue_primary = primary;
202 for (i = 0; i < ginfo->nblocks; i++) {
203 int cp_count = min(NGROUPS_PER_BLOCK, (int)ngroups);
204 int off = i * NGROUPS_PER_BLOCK;
206 for (j = 0; j < cp_count; j++)
207 ginfo->blocks[i][j] = groups[off + j];
214 struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash,
215 __u64 key, __u32 primary,
216 __u32 ngroups, __u32 *groups)
218 struct upcall_cache_entry *entry = NULL, *new = NULL, *next;
219 struct list_head *head;
226 if (strcmp(hash->uc_upcall, "NONE") == 0) {
227 new = alloc_entry(key);
229 CERROR("fail to alloc entry\n");
234 /* We have to sort the groups for 2.6 kernels */
235 LASSERT(ngroups <= 2);
236 if (ngroups == 2 && groups[1] == -1)
238 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
239 /* 2.6 needs groups array sorted */
240 if (ngroups == 2 && groups[0] > groups[1]) {
241 __u32 tmp = groups[1];
242 groups[1] = groups[0];
246 if (ngroups > 0 && groups[0] == -1) {
247 groups[0] = groups[1];
251 rc = entry_set_group_info(new, primary, ngroups, groups);
253 /* We can't cache this entry as it only has a subset of
254 * the user's groups, as sent in suppgid1, suppgid2. */
255 UC_CACHE_SET_EXPIRED(new);
258 head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
261 spin_lock(&hash->uc_lock);
262 list_for_each_entry_safe(entry, next, head, ue_hash) {
263 /* check invalid & expired items */
264 if (check_unlink_entry(entry))
266 if (entry->ue_key == key) {
272 if (!found) { /* didn't find it */
274 spin_unlock(&hash->uc_lock);
275 new = alloc_entry(key);
277 CERROR("fail to alloc entry\n");
278 RETURN(ERR_PTR(-ENOMEM));
282 list_add(&new->ue_hash, head);
290 list_move(&entry->ue_hash, head);
294 /* acquire for new one */
295 if (UC_CACHE_IS_NEW(entry)) {
296 UC_CACHE_SET_ACQUIRING(entry);
297 UC_CACHE_CLEAR_NEW(entry);
298 entry->ue_acquire_expire = jiffies + hash->uc_acquire_expire;
299 spin_unlock(&hash->uc_lock);
300 rc = refresh_entry(hash, entry);
301 spin_lock(&hash->uc_lock);
303 UC_CACHE_CLEAR_ACQUIRING(entry);
304 UC_CACHE_SET_INVALID(entry);
308 /* someone (and only one) is doing upcall upon
309 * this item, just wait it complete
311 if (UC_CACHE_IS_ACQUIRING(entry)) {
312 init_waitqueue_entry(&wait, current);
313 add_wait_queue(&entry->ue_waitq, &wait);
314 set_current_state(TASK_INTERRUPTIBLE);
315 spin_unlock(&hash->uc_lock);
317 schedule_timeout(hash->uc_acquire_expire);
319 spin_lock(&hash->uc_lock);
320 remove_wait_queue(&entry->ue_waitq, &wait);
321 if (UC_CACHE_IS_ACQUIRING(entry)) {
322 static unsigned long next;
323 /* we're interrupted or upcall failed in the middle */
324 if (time_after(jiffies, next)) {
325 CERROR("acquire timeout exceeded for key "LPU64
326 "\n", entry->ue_key);
327 next = jiffies + 1800;
330 GOTO(out, entry = ERR_PTR(-EIDRM));
335 /* invalid means error, don't need to try again */
336 if (UC_CACHE_IS_INVALID(entry)) {
338 GOTO(out, entry = ERR_PTR(-EIDRM));
342 * We can't refresh the existing one because some
343 * memory might be shared by multiple processes.
345 if (check_unlink_entry(entry)) {
346 /* if expired, try again. but if this entry is
347 * created by me but too quickly turn to expired
348 * without any error, should at least give a
349 * chance to use it once.
353 spin_unlock(&hash->uc_lock);
359 /* Now we know it's good */
361 spin_unlock(&hash->uc_lock);
364 EXPORT_SYMBOL(upcall_cache_get_entry);
366 void upcall_cache_put_entry(struct upcall_cache *hash,
367 struct upcall_cache_entry *entry)
376 LASSERT(atomic_read(&entry->ue_refcount) > 0);
377 spin_lock(&hash->uc_lock);
379 spin_unlock(&hash->uc_lock);
382 EXPORT_SYMBOL(upcall_cache_put_entry);
384 int upcall_cache_downcall(struct upcall_cache *hash, __u32 err, __u64 key,
385 __u32 primary, __u32 ngroups, __u32 *groups)
387 struct upcall_cache_entry *entry = NULL;
388 struct list_head *head;
389 int found = 0, rc = 0;
394 head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
396 spin_lock(&hash->uc_lock);
397 list_for_each_entry(entry, head, ue_hash) {
398 if (entry->ue_key == key) {
406 CDEBUG(D_OTHER, "%s: upcall for key "LPU64" not expected\n",
407 hash->uc_name, entry->ue_key);
408 /* haven't found, it's possible */
409 spin_unlock(&hash->uc_lock);
414 CDEBUG(D_OTHER, "%s: upcall for key "LPU64" returned %d\n",
415 hash->uc_name, entry->ue_key, err);
416 GOTO(out, rc = -EINVAL);
419 if (!UC_CACHE_IS_ACQUIRING(entry)) {
420 CDEBUG(D_HA, "%s: found uptodate entry %p (key "LPU64")\n",
421 hash->uc_name, entry, entry->ue_key);
425 if (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry)) {
426 CERROR("%s: found a stale entry %p (key "LPU64") in ioctl\n",
427 hash->uc_name, entry, entry->ue_key);
428 GOTO(out, rc = -EINVAL);
431 spin_unlock(&hash->uc_lock);
432 rc = entry_set_group_info(entry, primary, ngroups, groups);
433 spin_lock(&hash->uc_lock);
437 entry->ue_expire = jiffies + hash->uc_entry_expire;
438 UC_CACHE_SET_VALID(entry);
439 CDEBUG(D_OTHER, "%s: created upcall cache entry %p for key "LPU64"\n",
440 hash->uc_name, entry, entry->ue_key);
443 UC_CACHE_SET_INVALID(entry);
444 list_del_init(&entry->ue_hash);
446 UC_CACHE_CLEAR_ACQUIRING(entry);
447 spin_unlock(&hash->uc_lock);
448 wake_up_all(&entry->ue_waitq);
453 EXPORT_SYMBOL(upcall_cache_downcall);
455 static void cache_flush(struct upcall_cache *hash, int force)
457 struct upcall_cache_entry *entry, *next;
461 spin_lock(&hash->uc_lock);
462 for (i = 0; i < UC_CACHE_HASH_SIZE; i++) {
463 list_for_each_entry_safe(entry, next,
464 &hash->uc_hashtable[i], ue_hash) {
465 if (!force && atomic_read(&entry->ue_refcount)) {
466 UC_CACHE_SET_EXPIRED(entry);
469 LASSERT(!atomic_read(&entry->ue_refcount));
473 spin_unlock(&hash->uc_lock);
477 void upcall_cache_flush_idle(struct upcall_cache *cache)
479 cache_flush(cache, 0);
481 EXPORT_SYMBOL(upcall_cache_flush_idle);
483 void upcall_cache_flush_all(struct upcall_cache *cache)
485 cache_flush(cache, 1);
487 EXPORT_SYMBOL(upcall_cache_flush_all);
489 struct upcall_cache *upcall_cache_init(const char *name)
491 struct upcall_cache *hash;
495 OBD_ALLOC(hash, sizeof(*hash));
497 RETURN(ERR_PTR(-ENOMEM));
499 spin_lock_init(&hash->uc_lock);
500 for (i = 0; i < UC_CACHE_HASH_SIZE; i++)
501 INIT_LIST_HEAD(&hash->uc_hashtable[i]);
502 strncpy(hash->uc_name, name, sizeof(hash->uc_name) - 1);
503 /* set default value, proc tunable */
504 strcpy(hash->uc_upcall, "NONE");
505 hash->uc_entry_expire = 10 * 60 * HZ;
506 hash->uc_acquire_expire = 15 * HZ;
510 EXPORT_SYMBOL(upcall_cache_init);
512 void upcall_cache_cleanup(struct upcall_cache *hash)
516 upcall_cache_flush_all(hash);
517 OBD_FREE(hash, sizeof(*hash));
519 EXPORT_SYMBOL(upcall_cache_cleanup);