/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Supplementary groups cache.
+ * GPL HEADER START
*
- * Copyright (c) 2004 Cluster File Systems, Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
- * This file is part of Lustre, http://www.lustre.org.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
*
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
*
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lvfs/upcall_cache.c
+ *
+ * Supplementary groups cache.
*/
#define DEBUG_SUBSYSTEM S_SEC
-#ifdef HAVE_KERNEL_CONFIG_H
+#ifndef AUTOCONF_INCLUDED
#include <linux/config.h>
#endif
#include <linux/module.h>
#include <linux/stat.h>
#include <asm/uaccess.h>
#include <linux/slab.h>
-#include <asm/segment.h>
#include <obd_support.h>
#include <lustre_lib.h>
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
-struct group_info *groups_alloc(int ngroups)
-{
- struct group_info *ginfo;
-
- LASSERT(ngroups <= NGROUPS_SMALL);
-
- OBD_ALLOC(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
- if (!ginfo)
- return NULL;
- ginfo->ngroups = ngroups;
- ginfo->nblocks = 1;
- ginfo->blocks[0] = ginfo->small_block;
- atomic_set(&ginfo->usage, 1);
-
- return ginfo;
-}
-
-void groups_free(struct group_info *ginfo)
-{
- LASSERT(ginfo->ngroups <= NGROUPS_SMALL);
- LASSERT(ginfo->nblocks == 1);
- LASSERT(ginfo->blocks[0] == ginfo->small_block);
-
- OBD_FREE(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
-}
-#endif
-
-static struct upcall_cache_entry *alloc_entry(__u64 key)
+static struct upcall_cache_entry *alloc_entry(struct upcall_cache *cache,
+ __u64 key, void *args)
{
struct upcall_cache_entry *entry;
- OBD_ALLOC(entry, sizeof(*entry));
+ OBD_ALLOC_PTR(entry);
if (!entry)
return NULL;
entry->ue_key = key;
atomic_set(&entry->ue_refcount, 0);
init_waitqueue_head(&entry->ue_waitq);
+ if (cache->uc_ops->init_entry)
+ cache->uc_ops->init_entry(entry, args);
return entry;
}
-/* protected by hash lock */
-static void free_entry(struct upcall_cache_entry *entry)
+/* protected by cache lock */
+static void free_entry(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry)
{
- if (entry->ue_group_info)
- groups_free(entry->ue_group_info);
+ if (cache->uc_ops->free_entry)
+ cache->uc_ops->free_entry(cache, entry);
+
list_del(&entry->ue_hash);
CDEBUG(D_OTHER, "destroy cache entry %p for key "LPU64"\n",
entry, entry->ue_key);
- OBD_FREE(entry, sizeof(*entry));
+ OBD_FREE_PTR(entry);
}
-static void get_entry(struct upcall_cache_entry *entry)
+static inline int upcall_compare(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry,
+ __u64 key, void *args)
+{
+ if (entry->ue_key != key)
+ return -1;
+
+ if (cache->uc_ops->upcall_compare)
+ return cache->uc_ops->upcall_compare(cache, entry, key, args);
+
+ return 0;
+}
+
+static inline int downcall_compare(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry,
+ __u64 key, void *args)
+{
+ if (entry->ue_key != key)
+ return -1;
+
+ if (cache->uc_ops->downcall_compare)
+ return cache->uc_ops->downcall_compare(cache, entry, key, args);
+
+ return 0;
+}
+
+static inline void get_entry(struct upcall_cache_entry *entry)
{
atomic_inc(&entry->ue_refcount);
}
-static void put_entry(struct upcall_cache_entry *entry)
+static inline void put_entry(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry)
{
if (atomic_dec_and_test(&entry->ue_refcount) &&
(UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry))) {
- free_entry(entry);
+ free_entry(cache, entry);
}
}
-static int check_unlink_entry(struct upcall_cache_entry *entry)
+static int check_unlink_entry(struct upcall_cache *cache,
+ struct upcall_cache_entry *entry)
{
if (UC_CACHE_IS_VALID(entry) &&
time_before(jiffies, entry->ue_expire))
list_del_init(&entry->ue_hash);
if (!atomic_read(&entry->ue_refcount))
- free_entry(entry);
+ free_entry(cache, entry);
return 1;
}
-static int refresh_entry(struct upcall_cache *hash,
+static inline int refresh_entry(struct upcall_cache *cache,
struct upcall_cache_entry *entry)
{
- char *argv[4];
- char *envp[3];
- char keystr[16];
- int rc;
- ENTRY;
-
- snprintf(keystr, 16, LPU64, entry->ue_key);
-
- CDEBUG(D_INFO, "The groups upcall is: %s \n", hash->uc_upcall);
- argv[0] = hash->uc_upcall;
- argv[1] = hash->uc_name;
- argv[2] = keystr;
- argv[3] = NULL;
-
- envp[0] = "HOME=/";
- envp[1] = "PATH=/sbin:/usr/sbin";
- envp[2] = NULL;
-
- rc = USERMODEHELPER(argv[0], argv, envp);
- if (rc < 0) {
- CERROR("%s: error invoking getgroups upcall %s %s %s: rc %d; "
- "check /proc/fs/lustre/mds/%s/group_upcall\n",
- hash->uc_name, argv[0], argv[1], argv[2], rc, argv[1]);
- } else {
- CDEBUG(D_HA, "%s: invoked upcall %s %s %s\n", hash->uc_name,
- argv[0], argv[1], argv[2]);
- rc = 0;
- }
- RETURN(rc);
+ LASSERT(cache->uc_ops->do_upcall);
+ return cache->uc_ops->do_upcall(cache, entry);
}
-static int entry_set_group_info(struct upcall_cache_entry *entry, __u32 primary,
- __u32 ngroups, __u32 *groups)
-{
- struct group_info *ginfo;
- int i, j;
- ENTRY;
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
- if (ngroups > NGROUPS)
- ngroups = NGROUPS;
-#endif
-
- if (ngroups > NGROUPS_MAX) {
- CERROR("using first %d supplementary groups for uid "LPU64"\n",
- NGROUPS_MAX, entry->ue_key);
- ngroups = NGROUPS_MAX;
- }
-
- ginfo = groups_alloc(ngroups);
- if (!ginfo) {
- CERROR("uid "LPU64" update can't alloc ginfo for %d groups\n",
- entry->ue_key, ngroups);
- RETURN(-ENOMEM);
- }
- entry->ue_group_info = ginfo;
- entry->ue_primary = primary;
-
- for (i = 0; i < ginfo->nblocks; i++) {
- int cp_count = min(NGROUPS_PER_BLOCK, (int)ngroups);
- int off = i * NGROUPS_PER_BLOCK;
-
- for (j = 0; j < cp_count; j++)
- ginfo->blocks[i][j] = groups[off + j];
-
- ngroups -= cp_count;
- }
- RETURN(0);
-}
-
-struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash,
- __u64 key, __u32 primary,
- __u32 ngroups, __u32 *groups)
+struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *cache,
+ __u64 key, void *args)
{
struct upcall_cache_entry *entry = NULL, *new = NULL, *next;
struct list_head *head;
int rc, found;
ENTRY;
- LASSERT(hash);
+ LASSERT(cache);
- if (strcmp(hash->uc_upcall, "NONE") == 0) {
- new = alloc_entry(key);
- if (!new) {
- CERROR("fail to alloc entry\n");
- RETURN(NULL);
- }
- get_entry(new);
-
- /* We have to sort the groups for 2.6 kernels */
- LASSERT(ngroups <= 2);
- if (ngroups == 2 && groups[1] == -1)
- ngroups--;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
- /* 2.6 needs groups array sorted */
- if (ngroups == 2 && groups[0] > groups[1]) {
- __u32 tmp = groups[1];
- groups[1] = groups[0];
- groups[0] = tmp;
- }
-#endif
- if (ngroups > 0 && groups[0] == -1) {
- groups[0] = groups[1];
- ngroups--;
- }
-
- rc = entry_set_group_info(new, primary, ngroups, groups);
-
- /* We can't cache this entry as it only has a subset of
- * the user's groups, as sent in suppgid1, suppgid2. */
- UC_CACHE_SET_EXPIRED(new);
- RETURN(new);
- }
- head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
+ head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
find_again:
found = 0;
- spin_lock(&hash->uc_lock);
+ spin_lock(&cache->uc_lock);
list_for_each_entry_safe(entry, next, head, ue_hash) {
/* check invalid & expired items */
- if (check_unlink_entry(entry))
+ if (check_unlink_entry(cache, entry))
continue;
- if (entry->ue_key == key) {
+ if (upcall_compare(cache, entry, key, args) == 0) {
found = 1;
break;
}
if (!found) { /* didn't find it */
if (!new) {
- spin_unlock(&hash->uc_lock);
- new = alloc_entry(key);
+ spin_unlock(&cache->uc_lock);
+ new = alloc_entry(cache, key, args);
if (!new) {
CERROR("fail to alloc entry\n");
RETURN(ERR_PTR(-ENOMEM));
}
} else {
if (new) {
- free_entry(new);
+ free_entry(cache, new);
new = NULL;
}
list_move(&entry->ue_hash, head);
if (UC_CACHE_IS_NEW(entry)) {
UC_CACHE_SET_ACQUIRING(entry);
UC_CACHE_CLEAR_NEW(entry);
- entry->ue_acquire_expire = jiffies + hash->uc_acquire_expire;
- spin_unlock(&hash->uc_lock);
- rc = refresh_entry(hash, entry);
- spin_lock(&hash->uc_lock);
+ entry->ue_acquire_expire = jiffies + cache->uc_acquire_expire;
+ spin_unlock(&cache->uc_lock);
+ rc = refresh_entry(cache, entry);
+ spin_lock(&cache->uc_lock);
if (rc < 0) {
UC_CACHE_CLEAR_ACQUIRING(entry);
UC_CACHE_SET_INVALID(entry);
+ if (unlikely(rc == -EREMCHG)) {
+ put_entry(cache, entry);
+ GOTO(out, entry = ERR_PTR(rc));
+ }
}
/* fall through */
}
* this item, just wait it complete
*/
if (UC_CACHE_IS_ACQUIRING(entry)) {
+ unsigned long expiry = jiffies + cache->uc_acquire_expire;
+
init_waitqueue_entry(&wait, current);
add_wait_queue(&entry->ue_waitq, &wait);
set_current_state(TASK_INTERRUPTIBLE);
- spin_unlock(&hash->uc_lock);
+ spin_unlock(&cache->uc_lock);
- schedule_timeout(hash->uc_acquire_expire);
+ schedule_timeout(cache->uc_acquire_expire);
- spin_lock(&hash->uc_lock);
+ spin_lock(&cache->uc_lock);
remove_wait_queue(&entry->ue_waitq, &wait);
if (UC_CACHE_IS_ACQUIRING(entry)) {
- static unsigned long next;
/* we're interrupted or upcall failed in the middle */
- if (time_after(jiffies, next)) {
- CERROR("acquire timeout exceeded for key "LPU64
- "\n", entry->ue_key);
- next = jiffies + 1800;
- }
- put_entry(entry);
- GOTO(out, entry = ERR_PTR(-EIDRM));
+ rc = time_before(jiffies, expiry) ? -EINTR : -ETIMEDOUT;
+ put_entry(cache, entry);
+ CERROR("acquire timeout exceeded for key "LPU64
+ "\n", entry->ue_key);
+ GOTO(out, entry = ERR_PTR(rc));
}
/* fall through */
}
/* invalid means error, don't need to try again */
if (UC_CACHE_IS_INVALID(entry)) {
- put_entry(entry);
+ put_entry(cache, entry);
GOTO(out, entry = ERR_PTR(-EIDRM));
}
* We can't refresh the existing one because some
* memory might be shared by multiple processes.
*/
- if (check_unlink_entry(entry)) {
+ if (check_unlink_entry(cache, entry)) {
/* if expired, try again. but if this entry is
* created by me but too quickly turn to expired
* without any error, should at least give a
* chance to use it once.
*/
if (entry != new) {
- put_entry(entry);
- spin_unlock(&hash->uc_lock);
+ put_entry(cache, entry);
+ spin_unlock(&cache->uc_lock);
new = NULL;
goto find_again;
}
/* Now we know it's good */
out:
- spin_unlock(&hash->uc_lock);
+ spin_unlock(&cache->uc_lock);
RETURN(entry);
}
EXPORT_SYMBOL(upcall_cache_get_entry);
-void upcall_cache_put_entry(struct upcall_cache *hash,
+void upcall_cache_put_entry(struct upcall_cache *cache,
struct upcall_cache_entry *entry)
{
ENTRY;
}
LASSERT(atomic_read(&entry->ue_refcount) > 0);
- spin_lock(&hash->uc_lock);
- put_entry(entry);
- spin_unlock(&hash->uc_lock);
+ spin_lock(&cache->uc_lock);
+ put_entry(cache, entry);
+ spin_unlock(&cache->uc_lock);
EXIT;
}
EXPORT_SYMBOL(upcall_cache_put_entry);
-int upcall_cache_downcall(struct upcall_cache *hash, __u32 err, __u64 key,
- __u32 primary, __u32 ngroups, __u32 *groups)
+int upcall_cache_downcall(struct upcall_cache *cache, __u32 err, __u64 key,
+ void *args)
{
struct upcall_cache_entry *entry = NULL;
struct list_head *head;
int found = 0, rc = 0;
ENTRY;
- LASSERT(hash);
+ LASSERT(cache);
- head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
+ head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
- spin_lock(&hash->uc_lock);
+ spin_lock(&cache->uc_lock);
list_for_each_entry(entry, head, ue_hash) {
- if (entry->ue_key == key) {
+ if (downcall_compare(cache, entry, key, args) == 0) {
found = 1;
get_entry(entry);
break;
if (!found) {
CDEBUG(D_OTHER, "%s: upcall for key "LPU64" not expected\n",
- hash->uc_name, entry->ue_key);
+ cache->uc_name, key);
/* haven't found, it's possible */
- spin_unlock(&hash->uc_lock);
+ spin_unlock(&cache->uc_lock);
RETURN(-EINVAL);
}
if (err) {
CDEBUG(D_OTHER, "%s: upcall for key "LPU64" returned %d\n",
- hash->uc_name, entry->ue_key, err);
+ cache->uc_name, entry->ue_key, err);
GOTO(out, rc = -EINVAL);
}
if (!UC_CACHE_IS_ACQUIRING(entry)) {
- CDEBUG(D_HA, "%s: found uptodate entry %p (key "LPU64")\n",
- hash->uc_name, entry, entry->ue_key);
+ CDEBUG(D_RPCTRACE,"%s: found uptodate entry %p (key "LPU64")\n",
+ cache->uc_name, entry, entry->ue_key);
GOTO(out, rc = 0);
}
if (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry)) {
CERROR("%s: found a stale entry %p (key "LPU64") in ioctl\n",
- hash->uc_name, entry, entry->ue_key);
+ cache->uc_name, entry, entry->ue_key);
GOTO(out, rc = -EINVAL);
}
- spin_unlock(&hash->uc_lock);
- rc = entry_set_group_info(entry, primary, ngroups, groups);
- spin_lock(&hash->uc_lock);
+ spin_unlock(&cache->uc_lock);
+ if (cache->uc_ops->parse_downcall)
+ rc = cache->uc_ops->parse_downcall(cache, entry, args);
+ spin_lock(&cache->uc_lock);
if (rc)
GOTO(out, rc);
- entry->ue_expire = jiffies + hash->uc_entry_expire;
+ entry->ue_expire = jiffies + cache->uc_entry_expire;
UC_CACHE_SET_VALID(entry);
CDEBUG(D_OTHER, "%s: created upcall cache entry %p for key "LPU64"\n",
- hash->uc_name, entry, entry->ue_key);
+ cache->uc_name, entry, entry->ue_key);
out:
if (rc) {
UC_CACHE_SET_INVALID(entry);
list_del_init(&entry->ue_hash);
}
UC_CACHE_CLEAR_ACQUIRING(entry);
- spin_unlock(&hash->uc_lock);
+ spin_unlock(&cache->uc_lock);
wake_up_all(&entry->ue_waitq);
- put_entry(entry);
+ put_entry(cache, entry);
RETURN(rc);
}
EXPORT_SYMBOL(upcall_cache_downcall);
-static void cache_flush(struct upcall_cache *hash, int force)
+static void cache_flush(struct upcall_cache *cache, int force)
{
struct upcall_cache_entry *entry, *next;
int i;
ENTRY;
- spin_lock(&hash->uc_lock);
+ spin_lock(&cache->uc_lock);
for (i = 0; i < UC_CACHE_HASH_SIZE; i++) {
list_for_each_entry_safe(entry, next,
- &hash->uc_hashtable[i], ue_hash) {
+ &cache->uc_hashtable[i], ue_hash) {
if (!force && atomic_read(&entry->ue_refcount)) {
UC_CACHE_SET_EXPIRED(entry);
continue;
}
LASSERT(!atomic_read(&entry->ue_refcount));
- free_entry(entry);
+ free_entry(cache, entry);
}
}
- spin_unlock(&hash->uc_lock);
+ spin_unlock(&cache->uc_lock);
EXIT;
}
}
EXPORT_SYMBOL(upcall_cache_flush_all);
-struct upcall_cache *upcall_cache_init(const char *name)
+void upcall_cache_flush_one(struct upcall_cache *cache, __u64 key, void *args)
+{
+ struct list_head *head;
+ struct upcall_cache_entry *entry;
+ int found = 0;
+ ENTRY;
+
+ head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
+
+ spin_lock(&cache->uc_lock);
+ list_for_each_entry(entry, head, ue_hash) {
+ if (upcall_compare(cache, entry, key, args) == 0) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found) {
+ CWARN("%s: flush entry %p: key "LPU64", ref %d, fl %x, "
+ "cur %lu, ex %ld/%ld\n",
+ cache->uc_name, entry, entry->ue_key,
+ atomic_read(&entry->ue_refcount), entry->ue_flags,
+ get_seconds(), entry->ue_acquire_expire,
+ entry->ue_expire);
+ UC_CACHE_SET_EXPIRED(entry);
+ if (!atomic_read(&entry->ue_refcount))
+ free_entry(cache, entry);
+ }
+ spin_unlock(&cache->uc_lock);
+}
+EXPORT_SYMBOL(upcall_cache_flush_one);
+
+struct upcall_cache *upcall_cache_init(const char *name, const char *upcall,
+ struct upcall_cache_ops *ops)
{
- struct upcall_cache *hash;
+ struct upcall_cache *cache;
int i;
ENTRY;
- OBD_ALLOC(hash, sizeof(*hash));
- if (!hash)
+ OBD_ALLOC(cache, sizeof(*cache));
+ if (!cache)
RETURN(ERR_PTR(-ENOMEM));
- spin_lock_init(&hash->uc_lock);
+ spin_lock_init(&cache->uc_lock);
+ rwlock_init(&cache->uc_upcall_rwlock);
for (i = 0; i < UC_CACHE_HASH_SIZE; i++)
- INIT_LIST_HEAD(&hash->uc_hashtable[i]);
- strncpy(hash->uc_name, name, sizeof(hash->uc_name) - 1);
- /* set default value, proc tunable */
- strcpy(hash->uc_upcall, "NONE");
- hash->uc_entry_expire = 10 * 60 * HZ;
- hash->uc_acquire_expire = 15 * HZ;
-
- RETURN(hash);
+ INIT_LIST_HEAD(&cache->uc_hashtable[i]);
+ strncpy(cache->uc_name, name, sizeof(cache->uc_name) - 1);
+ /* upcall pathname proc tunable */
+ strncpy(cache->uc_upcall, upcall, sizeof(cache->uc_upcall) - 1);
+ cache->uc_entry_expire = 10 * 60 * HZ;
+ cache->uc_acquire_expire = 15 * HZ;
+ cache->uc_ops = ops;
+
+ RETURN(cache);
}
EXPORT_SYMBOL(upcall_cache_init);
-void upcall_cache_cleanup(struct upcall_cache *hash)
+void upcall_cache_cleanup(struct upcall_cache *cache)
{
- if (!hash)
+ if (!cache)
return;
- upcall_cache_flush_all(hash);
- OBD_FREE(hash, sizeof(*hash));
+ upcall_cache_flush_all(cache);
+ OBD_FREE(cache, sizeof(*cache));
}
EXPORT_SYMBOL(upcall_cache_cleanup);