Whamcloud - gitweb
use special macro for print time_t, cleanup in includes.
[fs/lustre-release.git] / lustre / ldlm / ldlm_resource.c
index f8b363d..aebfe5a 100644 (file)
@@ -5,45 +5,56 @@
  *   Author: Phil Schwan <phil@clusterfs.com>
  *   Author: Peter Braam <braam@clusterfs.com>
  *
- *   This file is part of Lustre, http://www.lustre.org.
+ *   This file is part of the Lustre file system, http://www.lustre.org
+ *   Lustre is a trademark of Cluster File Systems, Inc.
  *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
+ *   You may have signed or agreed to another license before downloading
+ *   this software.  If so, you are bound by the terms and conditions
+ *   of that agreement, and the following does not apply to you.  See the
+ *   LICENSE file included with this distribution for more information.
  *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
+ *   If you did not agree to a different license, then this copy of Lustre
+ *   is open source software; you can redistribute it and/or modify it
+ *   under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
  *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *   In either case, Lustre is distributed in the hope that it will be
+ *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   license text for more details.
  */
 
 #define DEBUG_SUBSYSTEM S_LDLM
 #ifdef __KERNEL__
-# include <linux/lustre_dlm.h>
+# include <lustre_dlm.h>
 #else
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
+#include <obd_class.h>
 #include "ldlm_internal.h"
 
-kmem_cache_t *ldlm_resource_slab, *ldlm_lock_slab;
+cfs_mem_cache_t *ldlm_resource_slab, *ldlm_lock_slab;
 
-DECLARE_MUTEX(ldlm_namespace_lock);
-struct list_head ldlm_namespace_list = LIST_HEAD_INIT(ldlm_namespace_list);
-struct proc_dir_entry *ldlm_type_proc_dir = NULL;
-struct proc_dir_entry *ldlm_ns_proc_dir = NULL;
-struct proc_dir_entry *ldlm_svc_proc_dir = NULL;
+atomic_t ldlm_srv_namespace_nr = ATOMIC_INIT(0);
+atomic_t ldlm_cli_namespace_nr = ATOMIC_INIT(0);
 
-#ifdef __KERNEL__
+struct semaphore ldlm_srv_namespace_lock;
+CFS_LIST_HEAD(ldlm_srv_namespace_list);
+
+struct semaphore ldlm_cli_namespace_lock;
+CFS_LIST_HEAD(ldlm_cli_namespace_list);
+
+cfs_proc_dir_entry_t *ldlm_type_proc_dir = NULL;
+cfs_proc_dir_entry_t *ldlm_ns_proc_dir = NULL;
+cfs_proc_dir_entry_t *ldlm_svc_proc_dir = NULL;
+
+#ifdef LPROCFS
 static int ldlm_proc_dump_ns(struct file *file, const char *buffer,
                              unsigned long count, void *data)
 {
-        ldlm_dump_all_namespaces(D_DLMTRACE);
+        ldlm_dump_all_namespaces(LDLM_NAMESPACE_SERVER, D_DLMTRACE);
+        ldlm_dump_all_namespaces(LDLM_NAMESPACE_CLIENT, D_DLMTRACE);
         RETURN(count);
 }
 
@@ -88,56 +99,44 @@ int ldlm_proc_setup(void)
         RETURN(0);
 
 err_ns:
-        lprocfs_remove(ldlm_ns_proc_dir);
+        lprocfs_remove(&ldlm_ns_proc_dir);
 err_type:
-        lprocfs_remove(ldlm_type_proc_dir);
+        lprocfs_remove(&ldlm_type_proc_dir);
 err:
-        ldlm_type_proc_dir = NULL;
-        ldlm_ns_proc_dir = NULL;
         ldlm_svc_proc_dir = NULL;
         RETURN(rc);
 }
 
 void ldlm_proc_cleanup(void)
 {
-        if (ldlm_svc_proc_dir) {
-                lprocfs_remove(ldlm_svc_proc_dir);
-                ldlm_svc_proc_dir = NULL;
-        }
+        if (ldlm_svc_proc_dir)
+                lprocfs_remove(&ldlm_svc_proc_dir);
 
-        if (ldlm_ns_proc_dir) {
-                lprocfs_remove(ldlm_ns_proc_dir);
-                ldlm_ns_proc_dir = NULL;
-        }
+        if (ldlm_ns_proc_dir)
+                lprocfs_remove(&ldlm_ns_proc_dir);
 
-        if (ldlm_type_proc_dir) {
-                lprocfs_remove(ldlm_type_proc_dir);
-                ldlm_type_proc_dir = NULL;
-        }
+        if (ldlm_type_proc_dir)
+                lprocfs_remove(&ldlm_type_proc_dir);
 }
 
-static int lprocfs_uint_rd(char *page, char **start, off_t off,
-                           int count, int *eof, void *data)
-{
-        unsigned int *temp = (unsigned int *)data;
-        return snprintf(page, count, "%u\n", *temp);
-}
-
-static int lprocfs_read_lru_size(char *page, char **start, off_t off,
-                                 int count, int *eof, void *data)
+static int lprocfs_rd_lru_size(char *page, char **start, off_t off,
+                               int count, int *eof, void *data)
 {
         struct ldlm_namespace *ns = data;
-        return lprocfs_uint_rd(page, start, off, count, eof,
-                               &ns->ns_max_unused);
+        __u32 *nr = &ns->ns_max_unused;
+
+        if (ns_connect_lru_resize(ns))
+                nr = &ns->ns_nr_unused;
+        return lprocfs_rd_uint(page, start, off, count, eof, nr);
 }
 
-#define MAX_STRING_SIZE 128
-static int lprocfs_write_lru_size(struct file *file, const char *buffer,
-                                  unsigned long count, void *data)
+static int lprocfs_wr_lru_size(struct file *file, const char *buffer,
+                               unsigned long count, void *data)
 {
         struct ldlm_namespace *ns = data;
         char dummy[MAX_STRING_SIZE + 1], *end;
         unsigned long tmp;
+        int lru_resize;
 
         dummy[MAX_STRING_SIZE] = '\0';
         if (copy_from_user(dummy, buffer, MAX_STRING_SIZE))
@@ -147,24 +146,67 @@ static int lprocfs_write_lru_size(struct file *file, const char *buffer,
                 CDEBUG(D_DLMTRACE,
                        "dropping all unused locks from namespace %s\n",
                        ns->ns_name);
-                tmp = ns->ns_max_unused;
-                ns->ns_max_unused = 0;
-                ldlm_cancel_lru(ns, LDLM_SYNC);
-                ns->ns_max_unused = tmp;
-               return count;
+                if (ns_connect_lru_resize(ns)) {
+                        int canceled, unused  = ns->ns_nr_unused;
+                        
+                        /* Try to cancel all @ns_nr_unused locks. */
+                        canceled = ldlm_cancel_lru(ns, unused, LDLM_SYNC, 
+                                                   LDLM_CANCEL_PASSED);
+                        if (canceled < unused) {
+                                CERROR("not all requested locks are canceled, "
+                                       "requested: %d, canceled: %d\n", unused, 
+                                       canceled);
+                                return -EINVAL;
+                        }
+                } else {
+                        tmp = ns->ns_max_unused;
+                        ns->ns_max_unused = 0;
+                        ldlm_cancel_lru(ns, 0, LDLM_SYNC, LDLM_CANCEL_PASSED);
+                        ns->ns_max_unused = tmp;
+                }
+                return count;
         }
 
         tmp = simple_strtoul(dummy, &end, 0);
-        if (tmp == 0 && *end) {
+        if (dummy == end) {
                 CERROR("invalid value written\n");
                 return -EINVAL;
         }
+        lru_resize = (tmp == 0);
+        
+        if (ns_connect_lru_resize(ns)) {
+                if (!lru_resize)
+                        ns->ns_max_unused = (unsigned int)tmp;
+                        
+                if (tmp > ns->ns_nr_unused)
+                        tmp = ns->ns_nr_unused;
+                tmp = ns->ns_nr_unused - tmp;
+                
+                CDEBUG(D_DLMTRACE, "changing namespace %s unused locks from %u to %u\n", 
+                       ns->ns_name, ns->ns_nr_unused, (unsigned int)tmp);
+                ldlm_cancel_lru(ns, (unsigned int)tmp, LDLM_ASYNC, LDLM_CANCEL_PASSED);
+                
+                if (!lru_resize) {
+                        CDEBUG(D_DLMTRACE, "disable lru_resize for namespace %s\n", 
+                               ns->ns_name);
+                        ns->ns_connect_flags &= ~OBD_CONNECT_LRU_RESIZE;
+                }
+        } else {
+                CDEBUG(D_DLMTRACE, "changing namespace %s max_unused from %u to %u\n",
+                       ns->ns_name, ns->ns_max_unused, (unsigned int)tmp);
+                ns->ns_max_unused = (unsigned int)tmp;
+                ldlm_cancel_lru(ns, 0, LDLM_ASYNC, LDLM_CANCEL_PASSED);
+                
+                /* Make sure that originally lru resize was supported before 
+                 * turning it on here. */
+                if (lru_resize && 
+                    (ns->ns_orig_connect_flags & OBD_CONNECT_LRU_RESIZE)) {
+                        CDEBUG(D_DLMTRACE, "enable lru_resize for namespace %s\n", 
+                               ns->ns_name);
+                        ns->ns_connect_flags |= OBD_CONNECT_LRU_RESIZE;
+                }
+        }
 
-        CDEBUG(D_DLMTRACE, "changing namespace %s max_unused from %u to %u\n",
-               ns->ns_name, ns->ns_max_unused, (unsigned int)tmp);
-
-        ns->ns_max_unused = (unsigned int)tmp;
-        ldlm_cancel_lru(ns, LDLM_ASYNC);
         return count;
 }
 
@@ -179,40 +221,86 @@ void ldlm_proc_namespace(struct ldlm_namespace *ns)
         lock_name[MAX_STRING_SIZE] = '\0';
 
         memset(lock_vars, 0, sizeof(lock_vars));
-        lock_vars[0].read_fptr = lprocfs_rd_u64;
         lock_vars[0].name = lock_name;
 
         snprintf(lock_name, MAX_STRING_SIZE, "%s/resource_count", ns->ns_name);
-        lock_vars[0].data = &ns->ns_resources;
+        lock_vars[0].data = &ns->ns_refcount;
+        lock_vars[0].read_fptr = lprocfs_rd_atomic;
         lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
 
         snprintf(lock_name, MAX_STRING_SIZE, "%s/lock_count", ns->ns_name);
         lock_vars[0].data = &ns->ns_locks;
+        lock_vars[0].read_fptr = lprocfs_rd_atomic;
         lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
 
-        if (ns->ns_client) {
+        if (ns_is_client(ns)) {
                 snprintf(lock_name, MAX_STRING_SIZE, "%s/lock_unused_count",
                          ns->ns_name);
                 lock_vars[0].data = &ns->ns_nr_unused;
-                lock_vars[0].read_fptr = lprocfs_uint_rd;
+                lock_vars[0].read_fptr = lprocfs_rd_uint;
                 lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
 
                 snprintf(lock_name, MAX_STRING_SIZE, "%s/lru_size",
                          ns->ns_name);
                 lock_vars[0].data = ns;
-                lock_vars[0].read_fptr = lprocfs_read_lru_size;
-                lock_vars[0].write_fptr = lprocfs_write_lru_size;
+                lock_vars[0].read_fptr = lprocfs_rd_lru_size;
+                lock_vars[0].write_fptr = lprocfs_wr_lru_size;
+                lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
+
+                snprintf(lock_name, MAX_STRING_SIZE, "%s/shrink_thumb",
+                         ns->ns_name);
+                lock_vars[0].data = ns;
+                lock_vars[0].read_fptr = lprocfs_rd_uint;
+                lock_vars[0].write_fptr = lprocfs_wr_uint;
+                lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
+
+                snprintf(lock_name, MAX_STRING_SIZE, "%s/lru_max_age",
+                         ns->ns_name);
+                lock_vars[0].data = &ns->ns_max_age;
+                lock_vars[0].read_fptr = lprocfs_rd_uint;
+                lock_vars[0].write_fptr = lprocfs_wr_uint;
+                lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
+        } else {
+                snprintf(lock_name, MAX_STRING_SIZE, "%s/ctime_age_limit",
+                         ns->ns_name);
+                lock_vars[0].data = &ns->ns_ctime_age_limit;
+                lock_vars[0].read_fptr = lprocfs_rd_uint;
+                lock_vars[0].write_fptr = lprocfs_wr_uint;
+                lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
+
+                snprintf(lock_name, MAX_STRING_SIZE, "%s/max_nolock_bytes",
+                         ns->ns_name);
+                lock_vars[0].data = &ns->ns_max_nolock_size;
+                lock_vars[0].read_fptr = lprocfs_rd_uint;
+                lock_vars[0].write_fptr = lprocfs_wr_uint;
+                lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
+
+                snprintf(lock_name, MAX_STRING_SIZE, "%s/contention_seconds",
+                         ns->ns_name);
+                lock_vars[0].data = &ns->ns_contention_time;
+                lock_vars[0].read_fptr = lprocfs_rd_uint;
+                lock_vars[0].write_fptr = lprocfs_wr_uint;
+                lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
+
+                snprintf(lock_name, MAX_STRING_SIZE, "%s/contended_locks",
+                         ns->ns_name);
+                lock_vars[0].data = &ns->ns_contended_locks;
+                lock_vars[0].read_fptr = lprocfs_rd_uint;
+                lock_vars[0].write_fptr = lprocfs_wr_uint;
                 lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
         }
 }
-#endif
 #undef MAX_STRING_SIZE
+#else
+#define ldlm_proc_namespace(ns) do {} while (0)
+#endif /* LPROCFS */
 
-struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client)
+struct ldlm_namespace *ldlm_namespace_new(char *name, ldlm_side_t client, 
+                                          ldlm_appetite_t apt)
 {
         struct ldlm_namespace *ns = NULL;
         struct list_head *bucket;
-        int rc;
+        int rc, idx, namelen;
         ENTRY;
 
         rc = ldlm_get_ref();
@@ -221,7 +309,7 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client)
                 RETURN(NULL);
         }
 
-        OBD_ALLOC(ns, sizeof(*ns));
+        OBD_ALLOC_PTR(ns);
         if (!ns)
                 GOTO(out_ref, NULL);
 
@@ -229,42 +317,60 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client)
         if (!ns->ns_hash)
                 GOTO(out_ns, NULL);
 
-        OBD_ALLOC(ns->ns_name, strlen(name) + 1);
+        ns->ns_shrink_thumb = LDLM_LOCK_SHRINK_THUMB;
+        ns->ns_appetite = apt;
+        namelen = strlen(name);
+        OBD_ALLOC(ns->ns_name, namelen + 1);
         if (!ns->ns_name)
                 GOTO(out_hash, NULL);
 
         strcpy(ns->ns_name, name);
 
-        INIT_LIST_HEAD(&ns->ns_root_list);
+        CFS_INIT_LIST_HEAD(&ns->ns_root_list);
         ns->ns_refcount = 0;
         ns->ns_client = client;
         spin_lock_init(&ns->ns_hash_lock);
         atomic_set(&ns->ns_locks, 0);
         ns->ns_resources = 0;
-        init_waitqueue_head(&ns->ns_waitq);
+        cfs_waitq_init(&ns->ns_waitq);
+        ns->ns_max_nolock_size = NS_DEFAULT_MAX_NOLOCK_BYTES;
+        ns->ns_contention_time = NS_DEFAULT_CONTENTION_SECONDS;
+        ns->ns_contended_locks = NS_DEFAULT_CONTENDED_LOCKS;
 
         for (bucket = ns->ns_hash + RES_HASH_SIZE - 1; bucket >= ns->ns_hash;
              bucket--)
-                INIT_LIST_HEAD(bucket);
+                CFS_INIT_LIST_HEAD(bucket);
 
-        INIT_LIST_HEAD(&ns->ns_unused_list);
+        CFS_INIT_LIST_HEAD(&ns->ns_unused_list);
         ns->ns_nr_unused = 0;
         ns->ns_max_unused = LDLM_DEFAULT_LRU_SIZE;
+        ns->ns_max_age = LDLM_DEFAULT_MAX_ALIVE;
+        ns->ns_ctime_age_limit = LDLM_CTIME_AGE_LIMIT;
         spin_lock_init(&ns->ns_unused_lock);
-
-        down(&ldlm_namespace_lock);
-        list_add(&ns->ns_list_chain, &ldlm_namespace_list);
-        up(&ldlm_namespace_lock);
-#ifdef __KERNEL__
+        ns->ns_orig_connect_flags = 0;
+        ns->ns_connect_flags = 0;
         ldlm_proc_namespace(ns);
-#endif
-        RETURN(ns);
 
+        idx = atomic_read(ldlm_namespace_nr(client));
+        rc = ldlm_pool_init(&ns->ns_pool, ns, idx, client);
+        if (rc) {
+                CERROR("Can't initialize lock pool, rc %d\n", rc);
+                GOTO(out_proc, rc);
+        }
+
+        mutex_down(ldlm_namespace_lock(client));
+        list_add(&ns->ns_list_chain, ldlm_namespace_list(client));
+        atomic_inc(ldlm_namespace_nr(client));
+        mutex_up(ldlm_namespace_lock(client));
+
+        RETURN(ns);
+out_proc:
+        ldlm_namespace_cleanup(ns, 0);
+        OBD_FREE(ns->ns_name, namelen + 1);
 out_hash:
-        POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
         OBD_VFREE(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
 out_ns:
-        OBD_FREE(ns, sizeof(*ns));
+        OBD_FREE_PTR(ns);
 out_ref:
         ldlm_put_ref(0);
         RETURN(NULL);
@@ -281,14 +387,14 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
                              int flags)
 {
         struct list_head *tmp;
-        int rc = 0, client = res->lr_namespace->ns_client;
+        int rc = 0, client = ns_is_client(res->lr_namespace);
         int local_only = (flags & LDLM_FL_LOCAL_ONLY);
         ENTRY;
 
-        
+
         do {
                 struct ldlm_lock *lock = NULL;
+
                 /* first, we look for non-cleaned-yet lock
                  * all cleaned locks are marked by CLEANED flag */
                 lock_res(res);
@@ -302,7 +408,7 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
                         lock->l_flags |= LDLM_FL_CLEANED;
                         break;
                 }
-                
+
                 if (lock == NULL) {
                         unlock_res(res);
                         break;
@@ -314,13 +420,15 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
                 lock->l_flags |= LDLM_FL_FAILED;
                 lock->l_flags |= flags;
 
+                /* ... without sending a CANCEL message for local_only. */
+                if (local_only)
+                        lock->l_flags |= LDLM_FL_LOCAL_ONLY;
+
                 if (local_only && (lock->l_readers || lock->l_writers)) {
                         /* This is a little bit gross, but much better than the
                          * alternative: pretend that we got a blocking AST from
                          * the server, so that when the lock is decref'd, it
                          * will go away ... */
-                        /* ... without sending a CANCEL message. */
-                        lock->l_flags |= LDLM_FL_LOCAL_ONLY;
                         unlock_res(res);
                         LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY");
                         if (lock->l_completion_ast)
@@ -334,14 +442,9 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
 
                         unlock_res(res);
                         ldlm_lock2handle(lock, &lockh);
-                        if (!local_only) {
-                                rc = ldlm_cli_cancel(&lockh);
-                                if (rc)
-                                        CERROR("ldlm_cli_cancel: %d\n", rc);
-                        }
-                        /* Force local cleanup on errors, too. */
-                        if (local_only || rc != ELDLM_OK)
-                                ldlm_lock_cancel(lock);
+                        rc = ldlm_cli_cancel(&lockh);
+                        if (rc)
+                                CERROR("ldlm_cli_cancel: %d\n", rc);
                 } else {
                         ldlm_resource_unlink_lock(lock);
                         unlock_res(res);
@@ -381,32 +484,16 @@ int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int flags)
                         spin_lock(&ns->ns_hash_lock);
                         tmp  = tmp->next;
 
-#if 0
-                        /* XXX what a mess: don't force cleanup if we're
-                         * local_only (which is only used by recovery).  In that
-                         * case, we probably still have outstanding lock refs
-                         * which reference these resources. -phil */
-                        if (!ldlm_resource_putref_locked(res) &&
-                            !(flags & LDLM_FL_LOCAL_ONLY)) {
-                                CERROR("Resource refcount nonzero (%d) after "
-                                       "lock cleanup; forcing cleanup.\n",
-                                       atomic_read(&res->lr_refcount));
-                                ldlm_resource_dump(D_ERROR, res);
-                                atomic_set(&res->lr_refcount, 1);
-                                ldlm_resource_putref_locked(res);
-                        }
-#endif
                         /* XXX: former stuff caused issues in case of race
                          * between ldlm_namespace_cleanup() and lockd() when
                          * client gets blocking ast when lock gets distracted by
                          * server. This is 1_4 branch solution, let's see how
                          * will it behave. */
-                        if (!ldlm_resource_putref_locked(res)) {
-                                CERROR("Namespace %s resource refcount nonzero "
+                        if (!ldlm_resource_putref_locked(res))
+                                CDEBUG(D_INFO,
+                                       "Namespace %s resource refcount nonzero "
                                        "(%d) after lock cleanup; forcing cleanup.\n",
                                        ns->ns_name, atomic_read(&res->lr_refcount));
-                                ldlm_resource_dump(D_ERROR, res);
-                        }
                 }
                 spin_unlock(&ns->ns_hash_lock);
         }
@@ -414,29 +501,57 @@ int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int flags)
         return ELDLM_OK;
 }
 
-static inline int ldlm_ns_refcount_atomic(struct ldlm_namespace *ns)
-{
-        int refcount;
-        spin_lock(&ns->ns_hash_lock);
-        refcount = ns->ns_refcount;
-        spin_unlock(&ns->ns_hash_lock);
-        return refcount;
-}
-
 /* Cleanup, but also free, the namespace */
-int ldlm_namespace_free(struct ldlm_namespace *ns, int force)
+int ldlm_namespace_free_prior(struct ldlm_namespace *ns)
 {
+        ENTRY;
         if (!ns)
                 RETURN(ELDLM_OK);
 
-        down(&ldlm_namespace_lock);
-        list_del(&ns->ns_list_chain);
-        up(&ldlm_namespace_lock);
+        mutex_down(ldlm_namespace_lock(ns->ns_client));
+        /*
+         * Some asserts and possibly other parts of code still using 
+         * list_empty(&ns->ns_list_chain). This is why it is important
+         * to use list_del_init() here.
+         */
+        list_del_init(&ns->ns_list_chain);
+        atomic_dec(ldlm_namespace_nr(ns->ns_client));
+        ldlm_pool_fini(&ns->ns_pool);
+        mutex_up(ldlm_namespace_lock(ns->ns_client));
 
         /* At shutdown time, don't call the cancellation callback */
         ldlm_namespace_cleanup(ns, 0);
 
-#ifdef __KERNEL__
+        if (ns->ns_refcount > 0) {
+                struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+                int rc;
+                CDEBUG(D_DLMTRACE,
+                       "dlm namespace %s free waiting on refcount %d\n",
+                       ns->ns_name, ns->ns_refcount);
+                rc = l_wait_event(ns->ns_waitq,
+                                  ns->ns_refcount == 0, &lwi);
+                if (ns->ns_refcount)
+                        LCONSOLE_ERROR_MSG(0x139, "Lock manager: wait for %s "
+                                           "namespace cleanup aborted with %d "
+                                           "resources in use. (%d)\nI'm going "
+                                           "to try to clean up anyway, but I "
+                                           "might need a reboot of this node.\n",
+                                            ns->ns_name, (int) ns->ns_refcount, 
+                                            rc);
+                CDEBUG(D_DLMTRACE,
+                       "dlm namespace %s free done waiting\n", ns->ns_name);
+        }
+
+        RETURN(ELDLM_OK);
+}
+
+int ldlm_namespace_free_post(struct ldlm_namespace *ns, int force)
+{
+        ENTRY;
+        if (!ns)
+                RETURN(ELDLM_OK);
+
+#ifdef LPROCFS
         {
                 struct proc_dir_entry *dir;
                 dir = lprocfs_srch(ldlm_ns_proc_dir, ns->ns_name);
@@ -444,48 +559,101 @@ int ldlm_namespace_free(struct ldlm_namespace *ns, int force)
                         CERROR("dlm namespace %s has no procfs dir?\n",
                                ns->ns_name);
                 } else {
-                        lprocfs_remove(dir);
+                        lprocfs_remove(&dir);
                 }
         }
 #endif
 
-        if (ldlm_ns_refcount_atomic(ns) > 0) {
-                struct l_wait_info lwi = LWI_INTR(NULL, NULL);
-                int rc;
-                CDEBUG(D_DLMTRACE,
-                       "dlm namespace %s free waiting on refcount %d\n",
-                       ns->ns_name, ns->ns_refcount);
-                rc = l_wait_event(ns->ns_waitq,
-                                  ldlm_ns_refcount_atomic(ns) == 0, &lwi);
-                if (ldlm_ns_refcount_atomic(ns)) {
-                        CDEBUG(D_ERROR, "Lock manager: wait for %s namespace "
-                               "cleanup aborted with %d resources in "
-                               "use. (%d)\nI'm going to try to clean "
-                               "up anyway, but I might need a reboot "
-                               "of this node.\n", ns->ns_name,
-                               ldlm_ns_refcount_atomic(ns), rc);
-                }
-                CDEBUG(D_DLMTRACE,
-                       "dlm namespace %s free done waiting\n", ns->ns_name);
-        }
-
-        POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
         OBD_VFREE(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
         OBD_FREE(ns->ns_name, strlen(ns->ns_name) + 1);
-        OBD_FREE(ns, sizeof(*ns));
-
+        /* 
+         * @ns should be not on list in this time, otherwise this will cause
+         * issues realted to using freed @ns in pools thread. 
+         */
+        LASSERT(list_empty(&ns->ns_list_chain));
+        OBD_FREE_PTR(ns);
         ldlm_put_ref(force);
+        RETURN(ELDLM_OK);
+}
+
 
+/* Cleanup the resource, and free namespace.
+ * bug 12864:
+ * Deadlock issue:
+ * proc1: destroy import
+ *        class_disconnect_export(grab cl_sem) ->
+ *              -> ldlm_namespace_free ->
+ *              -> lprocfs_remove(grab _lprocfs_lock).
+ * proc2: read proc info
+ *        lprocfs_fops_read(grab _lprocfs_lock) ->
+ *              -> osc_rd_active, etc(grab cl_sem).
+ *
+ * So that I have to split the ldlm_namespace_free into two parts - the first
+ * part ldlm_namespace_free_prior is used to cleanup the resource which is
+ * being used; the 2nd part ldlm_namespace_free_post is used to unregister the
+ * lprocfs entries, and then free memory. It will be called w/o cli->cl_sem
+ * held.
+ */
+int ldlm_namespace_free(struct ldlm_namespace *ns, int force)
+{
+        ldlm_namespace_free_prior(ns);
+        ldlm_namespace_free_post(ns, force);
         return ELDLM_OK;
 }
 
-static __u32 ldlm_hash_fn(struct ldlm_resource *parent, struct ldlm_res_id name)
+
+void ldlm_namespace_get_nolock(struct ldlm_namespace *ns)
+{
+        LASSERT(ns->ns_refcount >= 0);
+        ns->ns_refcount++;
+}
+
+void ldlm_namespace_get(struct ldlm_namespace *ns)
+{
+        spin_lock(&ns->ns_hash_lock);
+        ldlm_namespace_get_nolock(ns);
+        spin_unlock(&ns->ns_hash_lock);
+}
+
+void ldlm_namespace_put_nolock(struct ldlm_namespace *ns, int wakeup)
+{
+        LASSERT(ns->ns_refcount > 0);
+        ns->ns_refcount--;
+        if (ns->ns_refcount == 0 && wakeup)
+                wake_up(&ns->ns_waitq);
+}
+
+void ldlm_namespace_put(struct ldlm_namespace *ns, int wakeup)
+{
+        spin_lock(&ns->ns_hash_lock);
+        ldlm_namespace_put_nolock(ns, wakeup);
+        spin_unlock(&ns->ns_hash_lock);
+}
+
+/* Should be called under ldlm_namespace_lock(client) taken */
+void ldlm_namespace_move(struct ldlm_namespace *ns, ldlm_side_t client)
+{
+        LASSERT(!list_empty(&ns->ns_list_chain));
+        LASSERT_SEM_LOCKED(ldlm_namespace_lock(client));
+        list_move_tail(&ns->ns_list_chain, ldlm_namespace_list(client));
+}
+
+/* Should be called under ldlm_namespace_lock(client) taken */
+struct ldlm_namespace *ldlm_namespace_first(ldlm_side_t client)
+{
+        LASSERT_SEM_LOCKED(ldlm_namespace_lock(client));
+        LASSERT(!list_empty(ldlm_namespace_list(client)));
+        return container_of(ldlm_namespace_list(client)->next, 
+                struct ldlm_namespace, ns_list_chain);
+}
+static __u32 ldlm_hash_fn(struct ldlm_resource *parent,
+                          const struct ldlm_res_id *name)
 {
         __u32 hash = 0;
         int i;
 
         for (i = 0; i < RES_NAME_SIZE; i++)
-                hash += name.name[i];
+                hash += name->name[i];
 
         hash += (__u32)((unsigned long)parent >> 4);
 
@@ -495,18 +663,27 @@ static __u32 ldlm_hash_fn(struct ldlm_resource *parent, struct ldlm_res_id name)
 static struct ldlm_resource *ldlm_resource_new(void)
 {
         struct ldlm_resource *res;
+        int idx;
 
-        OBD_SLAB_ALLOC(res, ldlm_resource_slab, SLAB_NOFS, sizeof *res);
+        OBD_SLAB_ALLOC(res, ldlm_resource_slab, CFS_ALLOC_IO, sizeof *res);
         if (res == NULL)
                 return NULL;
 
         memset(res, 0, sizeof(*res));
 
-        INIT_LIST_HEAD(&res->lr_children);
-        INIT_LIST_HEAD(&res->lr_childof);
-        INIT_LIST_HEAD(&res->lr_granted);
-        INIT_LIST_HEAD(&res->lr_converting);
-        INIT_LIST_HEAD(&res->lr_waiting);
+        CFS_INIT_LIST_HEAD(&res->lr_children);
+        CFS_INIT_LIST_HEAD(&res->lr_childof);
+        CFS_INIT_LIST_HEAD(&res->lr_granted);
+        CFS_INIT_LIST_HEAD(&res->lr_converting);
+        CFS_INIT_LIST_HEAD(&res->lr_waiting);
+
+        /* initialize interval trees for each lock mode*/
+        for (idx = 0; idx < LCK_MODE_NUM; idx++) {
+                res->lr_itree[idx].lit_size = 0;
+                res->lr_itree[idx].lit_mode = 1 << idx;
+                res->lr_itree[idx].lit_root = NULL;
+        }
+
         atomic_set(&res->lr_refcount, 1);
         spin_lock_init(&res->lr_lock);
 
@@ -519,7 +696,8 @@ static struct ldlm_resource *ldlm_resource_new(void)
 
 /* must be called with hash lock held */
 static struct ldlm_resource *
-ldlm_resource_find(struct ldlm_namespace *ns, struct ldlm_res_id name, __u32 hash)
+ldlm_resource_find(struct ldlm_namespace *ns, const struct ldlm_res_id *name,
+                   __u32 hash)
 {
         struct list_head *bucket, *tmp;
         struct ldlm_resource *res;
@@ -529,7 +707,7 @@ ldlm_resource_find(struct ldlm_namespace *ns, struct ldlm_res_id name, __u32 has
 
         list_for_each(tmp, bucket) {
                 res = list_entry(tmp, struct ldlm_resource, lr_hash);
-                if (memcmp(&res->lr_name, &name, sizeof(res->lr_name)) == 0)
+                if (memcmp(&res->lr_name, name, sizeof(res->lr_name)) == 0)
                         return res;
         }
 
@@ -540,20 +718,20 @@ ldlm_resource_find(struct ldlm_namespace *ns, struct ldlm_res_id name, __u32 has
  * Returns: newly-allocated, referenced, unlocked resource */
 static struct ldlm_resource *
 ldlm_resource_add(struct ldlm_namespace *ns, struct ldlm_resource *parent,
-                  struct ldlm_res_id name, __u32 hash, __u32 type)
+                  const struct ldlm_res_id *name, __u32 hash, ldlm_type_t type)
 {
         struct list_head *bucket;
         struct ldlm_resource *res, *old_res;
         ENTRY;
 
-        LASSERTF(type >= LDLM_MIN_TYPE && type <= LDLM_MAX_TYPE,
-                 "type: %d", type);
+        LASSERTF(type >= LDLM_MIN_TYPE && type < LDLM_MAX_TYPE,
+                 "type: %d\n", type);
 
         res = ldlm_resource_new();
         if (!res)
                 RETURN(NULL);
 
-        memcpy(&res->lr_name, &name, sizeof(res->lr_name));
+        res->lr_name = *name;
         res->lr_namespace = ns;
         res->lr_type = type;
         res->lr_most_restr = LCK_NL;
@@ -577,7 +755,7 @@ ldlm_resource_add(struct ldlm_namespace *ns, struct ldlm_resource *parent,
         bucket = ns->ns_hash + hash;
         list_add(&res->lr_hash, bucket);
         ns->ns_resources++;
-        ns->ns_refcount++;
+        ldlm_namespace_get_nolock(ns);
 
         if (parent == NULL) {
                 list_add(&res->lr_childof, &ns->ns_root_list);
@@ -594,7 +772,7 @@ ldlm_resource_add(struct ldlm_namespace *ns, struct ldlm_resource *parent,
                 rc = ns->ns_lvbo->lvbo_init(res);
                 if (rc)
                         CERROR("lvbo_init failed for resource "
-                              LPU64": rc %d\n", name.name[0], rc);
+                               LPU64": rc %d\n", name->name[0], rc);
                 /* we create resource with locked lr_lvb_sem */
                 up(&res->lr_lvb_sem);
         }
@@ -607,7 +785,7 @@ ldlm_resource_add(struct ldlm_namespace *ns, struct ldlm_resource *parent,
  * Returns: referenced, unlocked ldlm_resource or NULL */
 struct ldlm_resource *
 ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
-                  struct ldlm_res_id name, __u32 type, int create)
+                  const struct ldlm_res_id *name, ldlm_type_t type, int create)
 {
         __u32 hash = ldlm_hash_fn(parent, name);
         struct ldlm_resource *res = NULL;
@@ -615,7 +793,7 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
 
         LASSERT(ns != NULL);
         LASSERT(ns->ns_hash != NULL);
-        LASSERT(name.name[0] != 0);
+        LASSERT(name->name[0] != 0);
 
         spin_lock(&ns->ns_hash_lock);
         res = ldlm_resource_find(ns, name, hash);
@@ -674,7 +852,9 @@ void __ldlm_resource_putref_final(struct ldlm_resource *res)
                 LBUG();
         }
 
-        ns->ns_refcount--;
+        /* Pass 0 here to not wake ->ns_waitq up yet, we will do it few 
+         * lines below when all children are freed. */
+        ldlm_namespace_put_nolock(ns, 0);
         list_del_init(&res->lr_hash);
         list_del_init(&res->lr_childof);
 
@@ -692,10 +872,11 @@ int ldlm_resource_putref(struct ldlm_resource *res)
 
         CDEBUG(D_INFO, "putref res: %p count: %d\n", res,
                atomic_read(&res->lr_refcount) - 1);
-        LASSERT(atomic_read(&res->lr_refcount) > 0);
-        LASSERT(atomic_read(&res->lr_refcount) < LI_POISON);
+        LASSERTF(atomic_read(&res->lr_refcount) > 0, "%d",
+                 atomic_read(&res->lr_refcount));
+        LASSERTF(atomic_read(&res->lr_refcount) < LI_POISON, "%d",
+                 atomic_read(&res->lr_refcount));
 
-        LASSERT(atomic_read(&res->lr_refcount) >= 0);
         if (atomic_dec_and_lock(&res->lr_refcount, &ns->ns_hash_lock)) {
                 __ldlm_resource_putref_final(res);
                 spin_unlock(&ns->ns_hash_lock);
@@ -750,41 +931,76 @@ void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head,
         list_add_tail(&lock->l_res_link, head);
 }
 
+void ldlm_resource_insert_lock_after(struct ldlm_lock *original,
+                                     struct ldlm_lock *new)
+{
+        struct ldlm_resource *res = original->l_resource;
+
+        check_res_locked(res);
+
+        ldlm_resource_dump(D_OTHER, res);
+        CDEBUG(D_OTHER, "About to insert this lock after %p:\n", original);
+        ldlm_lock_dump(D_OTHER, new, 0);
+
+        if (new->l_destroyed) {
+                CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
+                goto out;
+        }
+
+        LASSERT(list_empty(&new->l_res_link));
+
+        list_add(&new->l_res_link, &original->l_res_link);
+ out:;
+}
+
 void ldlm_resource_unlink_lock(struct ldlm_lock *lock)
 {
+        int type = lock->l_resource->lr_type;
+
         check_res_locked(lock->l_resource);
+        if (type == LDLM_IBITS || type == LDLM_PLAIN)
+                ldlm_unlink_lock_skiplist(lock);
+        else if (type == LDLM_EXTENT)
+                ldlm_extent_unlink_lock(lock);
         list_del_init(&lock->l_res_link);
 }
 
 void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc)
 {
         desc->lr_type = res->lr_type;
-        memcpy(&desc->lr_name, &res->lr_name, sizeof(desc->lr_name));
+        desc->lr_name = res->lr_name;
 }
 
-void ldlm_dump_all_namespaces(int level)
+void ldlm_dump_all_namespaces(ldlm_side_t client, int level)
 {
         struct list_head *tmp;
 
-        down(&ldlm_namespace_lock);
+        if (!((libcfs_debug | D_ERROR) & level))
+                return;
+
+        mutex_down(ldlm_namespace_lock(client));
 
-        list_for_each(tmp, &ldlm_namespace_list) {
+        list_for_each(tmp, ldlm_namespace_list(client)) {
                 struct ldlm_namespace *ns;
                 ns = list_entry(tmp, struct ldlm_namespace, ns_list_chain);
                 ldlm_namespace_dump(level, ns);
         }
 
-        up(&ldlm_namespace_lock);
+        mutex_up(ldlm_namespace_lock(client));
 }
 
 void ldlm_namespace_dump(int level, struct ldlm_namespace *ns)
 {
         struct list_head *tmp;
 
-        CDEBUG(level, "--- Namespace: %s (rc: %d, client: %d)\n",
-               ns->ns_name, ns->ns_refcount, ns->ns_client);
+        if (!((libcfs_debug | D_ERROR) & level))
+                return;
+
+        CDEBUG(level, "--- Namespace: %s (rc: %d, side: %s)\n", 
+               ns->ns_name, ns->ns_refcount, 
+               ns_is_client(ns) ? "client" : "server");
 
-        if (time_before(jiffies, ns->ns_next_dump))
+        if (cfs_time_before(cfs_time_current(), ns->ns_next_dump))
                 return;
 
         spin_lock(&ns->ns_hash_lock);
@@ -799,12 +1015,12 @@ void ldlm_namespace_dump(int level, struct ldlm_namespace *ns)
                 lock_res(res);
                 ldlm_resource_dump(level, res);
                 unlock_res(res);
-                
+
                 spin_lock(&ns->ns_hash_lock);
                 tmp = tmp->next;
                 ldlm_resource_putref_locked(res);
         }
-        ns->ns_next_dump = jiffies + 10 * HZ;
+        ns->ns_next_dump = cfs_time_shift(10);
         spin_unlock(&ns->ns_hash_lock);
 }
 
@@ -813,8 +1029,10 @@ void ldlm_resource_dump(int level, struct ldlm_resource *res)
         struct list_head *tmp;
         int pos;
 
-        if (RES_NAME_SIZE != 4)
-                LBUG();
+        CLASSERT(RES_NAME_SIZE == 4);
+
+        if (!((libcfs_debug | D_ERROR) & level))
+                return;
 
         CDEBUG(level, "--- Resource: %p ("LPU64"/"LPU64"/"LPU64"/"LPU64
                ") (rc: %d)\n", res, res->lr_name.name[0], res->lr_name.name[1],