Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / obdclass / lustre_handles.c
index 01dd75b..dfdfec4 100644 (file)
  * Copyright (C) 2002 Cluster File Systems, Inc.
  *   Author: Phil Schwan <phil@clusterfs.com>
  *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *   This file is part of the Lustre file system, http://www.lustre.org
+ *   Lustre is a trademark of Cluster File Systems, Inc.
  *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2.1 of the GNU Lesser General
- *   Public License as published by the Free Software Foundation.
+ *   You may have signed or agreed to another license before downloading
+ *   this software.  If so, you are bound by the terms and conditions
+ *   of that agreement, and the following does not apply to you.  See the
+ *   LICENSE file included with this distribution for more information.
  *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU Lesser General Public License for more details.
+ *   If you did not agree to a different license, then this copy of Lustre
+ *   is open source software; you can redistribute it and/or modify it
+ *   under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
  *
- *   You should have received a copy of the GNU Lesser General Public
- *   License along with Portals; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *   In either case, Lustre is distributed in the hope that it will be
+ *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   license text for more details.
  */
 
-#define DEBUG_SUBSYSTEM S_PORTALS
-#ifdef __KERNEL__
-#include <linux/types.h>
-#include <linux/random.h>
-#else 
-#include <liblustre.h>
-#endif 
+#define DEBUG_SUBSYSTEM S_CLASS
+#ifndef __KERNEL__
+# include <liblustre.h>
+#endif
 
+#include <obd_support.h>
+#include <lustre_handles.h>
+#include <lustre_lib.h>
 
-#include <linux/kp30.h>
-#include <linux/lustre_handles.h>
+#if !defined(HAVE_RCU) || !defined(__KERNEL__)
+# define list_add_rcu            list_add
+# define list_del_rcu            list_del
+# define list_for_each_rcu       list_for_each
+# define list_for_each_safe_rcu  list_for_each_safe
+# define rcu_read_lock()         spin_lock(&bucket->lock)
+# define rcu_read_unlock()       spin_unlock(&bucket->lock)
+#endif /* ifndef HAVE_RCU */
 
-static spinlock_t handle_lock = SPIN_LOCK_UNLOCKED;
-static spinlock_t random_lock = SPIN_LOCK_UNLOCKED;
-static struct list_head *handle_hash = NULL;
-static int handle_count = 0;
+static __u64 handle_base;
+#define HANDLE_INCR 7
+static spinlock_t handle_base_lock;
+
+static struct handle_bucket {
+        spinlock_t lock;
+        struct list_head head;
+} *handle_hash;
+
+static atomic_t handle_count = ATOMIC_INIT(0);
 
 #define HANDLE_HASH_SIZE (1 << 14)
 #define HANDLE_HASH_MASK (HANDLE_HASH_SIZE - 1)
 
+/*
+ * Generate a unique 64bit cookie (hash) for a handle and insert it into
+ * global (per-node) hash-table.
+ */
 void class_handle_hash(struct portals_handle *h, portals_handle_addref_cb cb)
 {
-        struct list_head *bucket;
+        struct handle_bucket *bucket;
         ENTRY;
 
         LASSERT(h != NULL);
         LASSERT(list_empty(&h->h_link));
 
-        /* My hypothesis is that get_random_bytes, if called from two threads at
-         * the same time, will return the same bytes. -phil */
-        spin_lock(&random_lock);
-        get_random_bytes(&h->h_cookie, sizeof(h->h_cookie));
-        spin_unlock(&random_lock);
-
+        /*
+         * This is fast, but simplistic cookie generation algorithm, it will
+         * need a re-do at some point in the future for security.
+         */
+        spin_lock(&handle_base_lock);
+        handle_base += HANDLE_INCR;
+
+        h->h_cookie = handle_base;
+        if (unlikely(handle_base == 0)) {
+                /*
+                 * Cookie of zero is "dangerous", because in many places it's
+                 * assumed that 0 means "unassigned" handle, not bound to any
+                 * object.
+                 */
+                CWARN("The universe has been exhausted: cookie wrap-around.\n");
+                handle_base += HANDLE_INCR;
+        }
+        spin_unlock(&handle_base_lock);
+        atomic_inc(&handle_count);
         h->h_addref = cb;
+        spin_lock_init(&h->h_lock);
 
-        bucket = handle_hash + (h->h_cookie & HANDLE_HASH_MASK);
+        bucket = &handle_hash[h->h_cookie & HANDLE_HASH_MASK];
+        spin_lock(&bucket->lock);
+        list_add_rcu(&h->h_link, &bucket->head);
+        h->h_in = 1;
+        spin_unlock(&bucket->lock);
 
-        CDEBUG(D_INFO, "adding object %p with handle "LPX64" to hash\n",
+        CDEBUG(D_INFO, "added object %p with handle "LPX64" to hash\n",
                h, h->h_cookie);
-
-        spin_lock(&handle_lock);
-        list_add(&h->h_link, bucket);
-        handle_count++;
-        spin_unlock(&handle_lock);
         EXIT;
 }
 
 static void class_handle_unhash_nolock(struct portals_handle *h)
 {
-        LASSERT(!list_empty(&h->h_link));
+        if (list_empty(&h->h_link)) {
+                CERROR("removing an already-removed handle ("LPX64")\n",
+                       h->h_cookie);
+                return;
+        }
 
         CDEBUG(D_INFO, "removing object %p with handle "LPX64" from hash\n",
                h, h->h_cookie);
 
-        handle_count--;
-        list_del_init(&h->h_link);
+        spin_lock(&h->h_lock);
+        if (h->h_in == 0) {
+                spin_unlock(&h->h_lock);
+                return;
+        }
+        h->h_in = 0;
+        spin_unlock(&h->h_lock);
+        list_del_rcu(&h->h_link);
 }
 
 void class_handle_unhash(struct portals_handle *h)
 {
-        spin_lock(&handle_lock);
+        struct handle_bucket *bucket;
+        bucket = handle_hash + (h->h_cookie & HANDLE_HASH_MASK);
+
+        spin_lock(&bucket->lock);
         class_handle_unhash_nolock(h);
-        spin_unlock(&handle_lock);
+        spin_unlock(&bucket->lock);
+
+        atomic_dec(&handle_count);
+}
+
+void class_handle_hash_back(struct portals_handle *h)
+{
+        struct handle_bucket *bucket;
+        ENTRY;
+
+        bucket = handle_hash + (h->h_cookie & HANDLE_HASH_MASK);
+
+        atomic_inc(&handle_count);
+        spin_lock(&bucket->lock);
+        list_add_rcu(&h->h_link, &bucket->head);
+        h->h_in = 1;
+        spin_unlock(&bucket->lock);
+
+        EXIT;
 }
 
 void *class_handle2object(__u64 cookie)
 {
-        struct list_head *bucket, *tmp;
+        struct handle_bucket *bucket;
+        struct list_head *tmp;
         void *retval = NULL;
         ENTRY;
 
         LASSERT(handle_hash != NULL);
 
-        spin_lock(&handle_lock);
+        /* Be careful when you want to change this code. See the 
+         * rcu_read_lock() definition on top this file. - jxiong */
         bucket = handle_hash + (cookie & HANDLE_HASH_MASK);
 
-        list_for_each(tmp, bucket) {
+        rcu_read_lock();
+        list_for_each_rcu(tmp, &bucket->head) {
                 struct portals_handle *h;
                 h = list_entry(tmp, struct portals_handle, h_link);
+                if (h->h_cookie != cookie)
+                        continue;
 
-                if (h->h_cookie == cookie) {
+                spin_lock(&h->h_lock);
+                if (likely(h->h_cookie != 0)) {
                         h->h_addref(h);
                         retval = h;
-                        break;
                 }
+                spin_unlock(&h->h_lock);
+                break;
         }
-        spin_unlock(&handle_lock);
+        rcu_read_unlock();
 
         RETURN(retval);
 }
 
+void class_handle_free_cb(struct rcu_head *rcu)
+{
+        struct portals_handle *h = RCU2HANDLE(rcu);
+        if (h->h_free_cb) {
+                h->h_free_cb(h->h_ptr, h->h_size);
+        } else {
+                void *ptr = h->h_ptr;
+                unsigned int size = h->h_size;
+                OBD_FREE(ptr, size);
+        }
+}
+
 int class_handle_init(void)
 {
-        struct list_head *bucket;
+        struct handle_bucket *bucket;
 
         LASSERT(handle_hash == NULL);
 
-        PORTAL_ALLOC(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
+        OBD_VMALLOC(handle_hash, sizeof(*bucket) * HANDLE_HASH_SIZE);
         if (handle_hash == NULL)
                 return -ENOMEM;
 
+        spin_lock_init(&handle_base_lock);
         for (bucket = handle_hash + HANDLE_HASH_SIZE - 1; bucket >= handle_hash;
-             bucket--)
-                INIT_LIST_HEAD(bucket);
+             bucket--) {
+                CFS_INIT_LIST_HEAD(&bucket->head);
+                spin_lock_init(&bucket->lock);
+        }
+        ll_get_random_bytes(&handle_base, sizeof(handle_base));
+        LASSERT(handle_base != 0ULL);
 
         return 0;
 }
@@ -133,34 +221,36 @@ static void cleanup_all_handles(void)
 {
         int i;
 
-        spin_lock(&handle_lock);
         for (i = 0; i < HANDLE_HASH_SIZE; i++) {
                 struct list_head *tmp, *pos;
-                list_for_each_safe(tmp, pos, &(handle_hash[i])) {
+                spin_lock(&handle_hash[i].lock);
+                list_for_each_safe_rcu(tmp, pos, &(handle_hash[i].head)) {
                         struct portals_handle *h;
                         h = list_entry(tmp, struct portals_handle, h_link);
 
-                        CERROR("forcing cleanup for handle "LPX64"\n",
-                               h->h_cookie);
+                        CERROR("force clean handle "LPX64" addr %p addref %p\n",
+                               h->h_cookie, h, h->h_addref);
 
                         class_handle_unhash_nolock(h);
                 }
+                spin_unlock(&handle_hash[i].lock);
         }
-        spin_lock(&handle_lock);
 }
 
 void class_handle_cleanup(void)
 {
+        int count;
         LASSERT(handle_hash != NULL);
 
-        if (handle_count != 0) {
-                CERROR("handle_count at cleanup: %d\n", handle_count);
+        count = atomic_read(&handle_count);
+        if (count != 0) {
+                CERROR("handle_count at cleanup: %d\n", count);
                 cleanup_all_handles();
         }
 
-        PORTAL_FREE(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
+        OBD_VFREE(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
         handle_hash = NULL;
 
-        if (handle_count)
-                CERROR("leaked %d handles\n", handle_count);
+        if (atomic_read(&handle_count))
+                CERROR("leaked %d handles\n", atomic_read(&handle_count));
 }