Whamcloud - gitweb
LU-1166 recovery: don't leak a connected client counter.
[fs/lustre-release.git] / lustre / obdclass / genops.c
index 8b00ad9..ce151fa 100644 (file)
@@ -28,6 +28,8 @@
 /*
  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -46,7 +48,6 @@
 #include <obd_ost.h>
 #include <obd_class.h>
 #include <lprocfs_status.h>
-#include <libcfs/bitmap.h>
 
 extern cfs_list_t obd_types;
 cfs_spinlock_t obd_types_lock;
@@ -67,240 +68,6 @@ static void print_export_data(struct obd_export *exp,
 
 int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
 
-
-cfs_spinlock_t obd_minor_lock;
-/**
- * Maximum number of OBD devices on a single node (includes devices
- * from all filesystems mounted on a client). This limit is itself arbitrary,
- * though the lov_user_md_{v1,v3} structures (used for specifying the
- * striping layout from llapi_setstripe() and on directory default EAs)
- * have a 16-bit limit on the starting OST index.
- **/
-const int obd_minor_map_size = 65536;
-cfs_bitmap_t *obd_minor_map;
-
-int obd_minor_alloc(void)
-{
-        int ret;
-
-        cfs_spin_lock(&obd_minor_lock);
-        ret = cfs_find_first_zero_bit(obd_minor_map->data, obd_minor_map_size);
-        if (ret != obd_minor_map_size)
-                cfs_bitmap_set(obd_minor_map, ret);
-        else
-                ret = -1;
-        cfs_spin_unlock(&obd_minor_lock);
-
-        return ret;
-}
-void obd_minor_release(long minor)
-{
-        cfs_spin_lock(&obd_minor_lock);
-        cfs_bitmap_clear(obd_minor_map, minor);
-        cfs_spin_unlock(&obd_minor_lock);
-}
-
-int obd_minor_valid(long minor)
-{
-        int ret;
-
-        cfs_spin_lock(&obd_minor_lock);
-        ret = cfs_bitmap_check(obd_minor_map, minor);
-        cfs_spin_unlock(&obd_minor_lock);
-
-        return ret;
-}
-
-static CFS_LIST_HEAD(obd_dev_list);
-static const int obd_hash_init_bits = 10;
-static const int obd_hash_max_bits = 30;
-static const int obd_hash_bkt_bits = 10;
-
-static cfs_hash_t *obd_name_hash = NULL;
-static unsigned obd_name_hops_hash(cfs_hash_t *lh, const void *key,
-                                   unsigned mask)
-{
-        return cfs_hash_djb2_hash(key, strlen(key), mask);
-}
-
-static void *obd_name_hops_obj(cfs_hlist_node_t *hn)
-{
-        struct obd_device *obd = cfs_hlist_entry(hn, struct obd_device,
-                                                 obd_name_node);
-        LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC,
-                 "%p obd_magic %08x != %08x\n",
-                 obd, obd->obd_magic, OBD_DEVICE_MAGIC);
-
-        return (void *)obd;
-}
-
-static void *obd_name_hops_key(cfs_hlist_node_t *hn)
-{
-        struct obd_device *obd = obd_name_hops_obj(hn);
-
-        return &obd->obd_name;
-}
-
-static int obd_name_hops_compare(const void *key, cfs_hlist_node_t *hn)
-{
-        void *nk = obd_name_hops_key(hn);
-
-        return strcmp(key, nk) == 0;
-}
-
-static void obd_name_hops_noop(cfs_hash_t *hs, cfs_hlist_node_t *hn)
-{
-        obd_name_hops_obj(hn);
-}
-
-static cfs_hash_ops_t obd_name_hops = {
-        .hs_hash        = obd_name_hops_hash,
-        .hs_keycmp      = obd_name_hops_compare,
-        .hs_key         = obd_name_hops_key,
-        .hs_object      = obd_name_hops_obj,
-        .hs_get         = obd_name_hops_noop,
-        .hs_put_locked  = obd_name_hops_noop,
-};
-
-static cfs_hash_t *obd_uuid_hash = NULL;
-static unsigned obd_uuid_hops_hash(cfs_hash_t *lh, const void *key,
-                                   unsigned mask)
-{
-        return cfs_hash_djb2_hash(key, strlen(key), mask);
-}
-
-static void *obd_uuid_hops_obj(cfs_hlist_node_t *hn)
-{
-        struct obd_device *obd = cfs_hlist_entry(hn, struct obd_device,
-                                                 obd_uuid_node);
-        LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC,
-                 "%p obd_magic %08x != %08x\n",
-                 obd, obd->obd_magic, OBD_DEVICE_MAGIC);
-
-        return (void *)obd;
-}
-
-static void *obd_uuid_hops_key(cfs_hlist_node_t *hn)
-{
-        struct obd_device *obd = obd_uuid_hops_obj(hn);
-
-        return &obd->obd_uuid;
-}
-
-static int obd_uuid_hops_compare(const void *key, cfs_hlist_node_t *hn)
-{
-        void *nk = obd_uuid_hops_key(hn);
-
-        return obd_uuid_equals(key, nk);
-}
-
-static void obd_uuid_hops_noop(cfs_hash_t *hs, cfs_hlist_node_t *hn)
-{
-        obd_uuid_hops_obj(hn);
-}
-
-static cfs_hash_ops_t obd_uuid_hops = {
-        .hs_hash        = obd_uuid_hops_hash,
-        .hs_keycmp      = obd_uuid_hops_compare,
-        .hs_key         = obd_uuid_hops_key,
-        .hs_object      = obd_uuid_hops_obj,
-        .hs_get         = obd_uuid_hops_noop,
-        .hs_put_locked  = obd_uuid_hops_noop,
-};
-
-static cfs_hash_t *obd_minor_hash = NULL;
-static unsigned obd_minor_hops_hash(cfs_hash_t *lh, const void *key,
-                                    unsigned mask)
-{
-        return cfs_hash_u32_hash(*((__u32 *)key), mask);
-}
-
-static void *obd_minor_hops_obj(cfs_hlist_node_t *hn)
-{
-        struct obd_device *obd = cfs_hlist_entry(hn, struct obd_device,
-                                                 obd_minor_node);
-        LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC,
-                 "%p obd_magic %08x != %08x\n",
-                 obd, obd->obd_magic, OBD_DEVICE_MAGIC);
-
-        return (void *)obd;
-}
-
-static void *obd_minor_hops_key(cfs_hlist_node_t *hn)
-{
-        struct obd_device *obd = obd_minor_hops_obj(hn);
-
-        return &obd->obd_minor;
-}
-
-static int obd_minor_hops_compare(const void *key, cfs_hlist_node_t *hn)
-{
-        __u32 *nk = obd_minor_hops_key(hn);
-
-        return *((__u32 *)key) == *nk;
-}
-
-static void obd_minor_hops_noop(cfs_hash_t *hs, cfs_hlist_node_t *hn)
-{
-        obd_minor_hops_obj(hn);
-}
-
-static cfs_hash_ops_t obd_minor_hops = {
-        .hs_hash        = obd_minor_hops_hash,
-        .hs_keycmp      = obd_minor_hops_compare,
-        .hs_key         = obd_minor_hops_key,
-        .hs_object      = obd_minor_hops_obj,
-        .hs_get         = obd_minor_hops_noop,
-        .hs_put_locked  = obd_minor_hops_noop,
-};
-
-int obd_hashes_init(void)
-{
-        obd_name_hash = cfs_hash_create("obd_name",
-                                        obd_hash_init_bits, obd_hash_max_bits,
-                                        obd_hash_bkt_bits, 0,
-                                        CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA,
-                                        &obd_name_hops,
-                                        CFS_HASH_DEFAULT | CFS_HASH_NO_ITEMREF);
-        if (obd_name_hash == NULL)
-                return -ENOMEM;
-
-        obd_uuid_hash = cfs_hash_create("obd_uuid",
-                                        obd_hash_init_bits, obd_hash_max_bits,
-                                        obd_hash_bkt_bits, 0,
-                                        CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA,
-                                        &obd_uuid_hops,
-                                        CFS_HASH_DEFAULT | CFS_HASH_NO_ITEMREF);
-        if (obd_name_hash == NULL)
-                return -ENOMEM;
-
-        obd_minor_hash = cfs_hash_create("obd_minor",
-                                         obd_hash_init_bits, obd_hash_max_bits,
-                                         obd_hash_bkt_bits, 0,
-                                         CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA,
-                                         &obd_minor_hops,
-                                         CFS_HASH_DEFAULT | CFS_HASH_NO_ITEMREF);
-        if (obd_name_hash == NULL)
-                return -ENOMEM;
-
-        obd_minor_map = CFS_ALLOCATE_BITMAP(obd_minor_map_size);
-        if (obd_minor_map == NULL)
-                return -ENOMEM;
-
-        cfs_spin_lock_init(&obd_minor_lock);
-
-        return 0;
-}
-
-void obd_hashes_fini(void)
-{
-        if (obd_minor_map)
-               CFS_FREE_BITMAP(obd_minor_map);
-        cfs_hash_putref(obd_name_hash);
-        cfs_hash_putref(obd_uuid_hash);
-        cfs_hash_putref(obd_minor_hash);
-}
-
 /*
  * support functions: we could use inter-module communication, but this
  * is more portable to other OS's
@@ -371,6 +138,7 @@ struct obd_type *class_get_type(const char *name)
         }
         return type;
 }
+EXPORT_SYMBOL(class_get_type);
 
 void class_put_type(struct obd_type *type)
 {
@@ -380,6 +148,7 @@ void class_put_type(struct obd_type *type)
         cfs_module_put(type->typ_dt_ops->o_owner);
         cfs_spin_unlock(&type->obd_type_lock);
 }
+EXPORT_SYMBOL(class_put_type);
 
 #define CLASS_MAX_NAME 1024
 
@@ -452,6 +221,7 @@ int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
         OBD_FREE(type, sizeof(*type));
         RETURN(rc);
 }
+EXPORT_SYMBOL(class_register_type);
 
 int class_unregister_type(const char *name)
 {
@@ -490,63 +260,7 @@ int class_unregister_type(const char *name)
         OBD_FREE(type, sizeof(*type));
         RETURN(0);
 } /* class_unregister_type */
-
-const char *obd_dev_status(struct obd_device *obd)
-{
-        const char *status;
-
-        if (obd->obd_stopping)
-                status = "ST";
-        else if (obd->obd_inactive)
-                status = "IN";
-        else if (obd->obd_set_up)
-                status = "UP";
-        else if (obd->obd_attached)
-                status = "AT";
-        else
-                status = "--";
-
-        return status;
-}
-
-#define cfs_list_entry_safe(pos, head, type, member)  \
-        (pos == head ? NULL : cfs_list_entry(pos, type, member))
-
-void obd_devlist_first(struct obd_device **pos)
-{
-        struct obd_device *obd;
-
-        cfs_spin_lock(&obd_dev_lock);
-        obd = cfs_list_entry_safe(obd_dev_list.next, &obd_dev_list,
-                                  struct obd_device, obd_list);
-        if (obd != NULL)
-                class_incref(obd, "devlist", obd);
-        cfs_spin_unlock(&obd_dev_lock);
-
-        *pos = obd;
-}
-
-void obd_devlist_next(struct obd_device **pos)
-{
-        struct obd_device *obd = NULL;
-
-        cfs_spin_lock(&obd_dev_lock);
-        obd = cfs_list_entry_safe((*pos)->obd_list.next, &obd_dev_list,
-                                  struct obd_device, obd_list);
-        if (obd)
-                class_incref(obd, "devlist", obd);
-        cfs_spin_unlock(&obd_dev_lock);
-
-        class_decref(*pos, "devlist", *pos);
-        *pos = obd;
-}
-
-void obd_devlist_last(struct obd_device *pos)
-{
-        if (pos)
-                class_decref(pos,"devlist", pos);
-
-}
+EXPORT_SYMBOL(class_unregister_type);
 
 /**
  * Create a new obd device.
@@ -559,11 +273,14 @@ void obd_devlist_last(struct obd_device *pos)
  * \retval NULL if create fails, otherwise return the obd device
  *         pointer created.
  */
-struct obd_device *class_newdev(const char *type_name, const char *name, const char *uuid)
+struct obd_device *class_newdev(const char *type_name, const char *name)
 {
+        struct obd_device *result = NULL;
         struct obd_device *newdev;
-        struct obd_type *type;
-        long ret;
+        struct obd_type *type = NULL;
+        int i;
+        int new_obd_minor = 0;
+        ENTRY;
 
         if (strlen(name) >= MAX_OBD_NAME) {
                 CERROR("name/uuid must be < %u bytes long\n", MAX_OBD_NAME);
@@ -573,52 +290,64 @@ struct obd_device *class_newdev(const char *type_name, const char *name, const c
         type = class_get_type(type_name);
         if (type == NULL){
                 CERROR("OBD: unknown type: %s\n", type_name);
-                ret = -ENODEV;
-                goto error_type;
+                RETURN(ERR_PTR(-ENODEV));
         }
 
         newdev = obd_device_alloc();
         if (newdev == NULL) {
-                ret = -ENOMEM;
-                goto error_device;
+                class_put_type(type);
+                RETURN(ERR_PTR(-ENOMEM));
         }
-
-        newdev->obd_minor = obd_minor_alloc();
-        if (newdev->obd_minor < 0) {
-                CERROR("don't have free minors\n");
-                ret = -ENODATA;
-                goto error_minor;
+        LASSERT(newdev->obd_magic == OBD_DEVICE_MAGIC);
+
+        cfs_write_lock(&obd_dev_lock);
+        for (i = 0; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
+
+                if (obd && obd->obd_name &&
+                    (strcmp(name, obd->obd_name) == 0)) {
+                        CERROR("Device %s already exists at %d, won't add\n",
+                               name, i);
+                        if (result) {
+                                LASSERTF(result->obd_magic == OBD_DEVICE_MAGIC,
+                                         "%p obd_magic %08x != %08x\n", result,
+                                         result->obd_magic, OBD_DEVICE_MAGIC);
+                                LASSERTF(result->obd_minor == new_obd_minor,
+                                         "%p obd_minor %d != %d\n", result,
+                                         result->obd_minor, new_obd_minor);
+
+                                obd_devs[result->obd_minor] = NULL;
+                                result->obd_name[0]='\0';
+                         }
+                        result = ERR_PTR(-EEXIST);
+                        break;
+                }
+                if (!result && !obd) {
+                        result = newdev;
+                        result->obd_minor = i;
+                        new_obd_minor = i;
+                        result->obd_type = type;
+                        strncpy(result->obd_name, name,
+                                sizeof(result->obd_name) - 1);
+                        obd_devs[i] = result;
+                }
         }
+        cfs_write_unlock(&obd_dev_lock);
 
-        /* find add unique by name */
-        strncpy(newdev->obd_name, name, sizeof(newdev->obd_name) - 1);
-        if (cfs_hash_add_unique(obd_name_hash, name, &newdev->obd_name_node)) {
-                CERROR("fails to add an unique obddev (%s) to the hash\n",
-                       name);
-                ret = -EEXIST;
-                goto error_dup;
+        if (result == NULL && i >= class_devno_max()) {
+                CERROR("all %u OBD devices used, increase MAX_OBD_DEVICES\n",
+                       class_devno_max());
+                RETURN(ERR_PTR(-EOVERFLOW));
         }
-        newdev->obd_type = type;
-
-        cfs_hash_add(obd_minor_hash, &newdev->obd_minor, &newdev->obd_minor_node);
-        memcpy(newdev->obd_uuid.uuid, uuid, strlen(uuid));
-        cfs_hash_add(obd_uuid_hash, uuid, &newdev->obd_uuid_node);
-
-        cfs_spin_lock(&obd_dev_lock);
-        cfs_list_add_tail(&newdev->obd_list, &obd_dev_list);
-        cfs_spin_unlock(&obd_dev_lock);
 
-        CDEBUG(D_IOCTL, "Adding new device %s (%p)\n",
-               newdev->obd_name, newdev);
-        RETURN(newdev);
-error_dup:
-       obd_minor_release(newdev->obd_minor);
-error_minor:
-        obd_device_free(newdev);
-error_device:
-        class_put_type(type);
-error_type:
-        RETURN(ERR_PTR(ret));
+        if (IS_ERR(result)) {
+                obd_device_free(newdev);
+                class_put_type(type);
+        } else {
+                CDEBUG(D_IOCTL, "Adding new device %s (%p)\n",
+                       result->obd_name, result);
+        }
+        RETURN(result);
 }
 
 void class_release_dev(struct obd_device *obd)
@@ -627,20 +356,16 @@ void class_release_dev(struct obd_device *obd)
 
         LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, "%p obd_magic %08x != %08x\n",
                  obd, obd->obd_magic, OBD_DEVICE_MAGIC);
+        LASSERTF(obd == obd_devs[obd->obd_minor], "obd %p != obd_devs[%d] %p\n",
+                 obd, obd->obd_minor, obd_devs[obd->obd_minor]);
         LASSERT(obd_type != NULL);
 
-        CDEBUG(D_INFO, "Release obd device %s obd_type name =%s\n",
-               obd->obd_name,obd->obd_type->typ_name);
-
-        cfs_hash_del(obd_name_hash, obd->obd_name, &obd->obd_name_node);
-        cfs_hash_del(obd_uuid_hash, &obd->obd_uuid, &obd->obd_uuid_node);
-        cfs_hash_del(obd_minor_hash, &obd->obd_minor, &obd->obd_minor_node);
+        CDEBUG(D_INFO, "Release obd device %s at %d obd_type name =%s\n",
+               obd->obd_name, obd->obd_minor, obd->obd_type->typ_name);
 
-        cfs_spin_lock(&obd_dev_lock);
-        cfs_list_del(&obd->obd_list);
-        cfs_spin_unlock(&obd_dev_lock);
-
-        obd_minor_release(obd->obd_minor);
+        cfs_write_lock(&obd_dev_lock);
+        obd_devs[obd->obd_minor] = NULL;
+        cfs_write_unlock(&obd_dev_lock);
         obd_device_free(obd);
 
         class_put_type(obd_type);
@@ -648,29 +373,70 @@ void class_release_dev(struct obd_device *obd)
 
 int class_name2dev(const char *name)
 {
-        struct obd_device *obd;
+        int i;
+
+        if (!name)
+                return -1;
 
-        obd = cfs_hash_lookup(obd_name_hash, name);
-        return obd != NULL ? obd->obd_minor : -1 ;
+        cfs_read_lock(&obd_dev_lock);
+        for (i = 0; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
+
+                if (obd && obd->obd_name && strcmp(name, obd->obd_name) == 0) {
+                        /* Make sure we finished attaching before we give
+                           out any references */
+                        LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
+                        if (obd->obd_attached) {
+                                cfs_read_unlock(&obd_dev_lock);
+                                return i;
+                        }
+                        break;
+                }
+        }
+        cfs_read_unlock(&obd_dev_lock);
+
+        return -1;
 }
+EXPORT_SYMBOL(class_name2dev);
 
 struct obd_device *class_name2obd(const char *name)
 {
-        return cfs_hash_lookup(obd_name_hash, name);
+        int dev = class_name2dev(name);
+
+        if (dev < 0 || dev > class_devno_max())
+                return NULL;
+        return class_num2obd(dev);
 }
+EXPORT_SYMBOL(class_name2obd);
 
 int class_uuid2dev(struct obd_uuid *uuid)
 {
-        struct obd_device *obd;
+        int i;
+
+        cfs_read_lock(&obd_dev_lock);
+        for (i = 0; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
 
-        obd = cfs_hash_lookup(obd_uuid_hash, uuid);
-        return obd != NULL ? obd->obd_minor : -1;
+                if (obd && obd_uuid_equals(uuid, &obd->obd_uuid)) {
+                        LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
+                        cfs_read_unlock(&obd_dev_lock);
+                        return i;
+                }
+        }
+        cfs_read_unlock(&obd_dev_lock);
+
+        return -1;
 }
+EXPORT_SYMBOL(class_uuid2dev);
 
 struct obd_device *class_uuid2obd(struct obd_uuid *uuid)
 {
-        return  cfs_hash_lookup(obd_uuid_hash, uuid);
+        int dev = class_uuid2dev(uuid);
+        if (dev < 0)
+                return NULL;
+        return class_num2obd(dev);
 }
+EXPORT_SYMBOL(class_uuid2obd);
 
 /**
  * Get obd device from ::obd_devs[]
@@ -680,30 +446,52 @@ struct obd_device *class_uuid2obd(struct obd_uuid *uuid)
  * \retval NULL if ::obd_devs[\a num] does not contains an obd device
  *         otherwise return the obd device there.
  */
-struct obd_device *class_num2obd(__u32 minor)
+struct obd_device *class_num2obd(int num)
 {
-        struct obd_device *obd;
+        struct obd_device *obd = NULL;
 
-        obd = cfs_hash_lookup(obd_minor_hash, &minor);
+        if (num < class_devno_max()) {
+                obd = obd_devs[num];
+                if (obd == NULL)
+                        return NULL;
+
+                LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC,
+                         "%p obd_magic %08x != %08x\n",
+                         obd, obd->obd_magic, OBD_DEVICE_MAGIC);
+                LASSERTF(obd->obd_minor == num,
+                         "%p obd_minor %0d != %0d\n",
+                         obd, obd->obd_minor, num);
+        }
 
         return obd;
 }
+EXPORT_SYMBOL(class_num2obd);
 
 void class_obd_list(void)
 {
-        const char *status;
-        struct obd_device *obd;
+        char *status;
+        int i;
 
-        for (obd_devlist_first(&obd);
-             obd != NULL;
-             obd_devlist_next(&obd)) {
+        cfs_read_lock(&obd_dev_lock);
+        for (i = 0; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
 
-                status = obd_dev_status(obd);
+                if (obd == NULL)
+                        continue;
+                if (obd->obd_stopping)
+                        status = "ST";
+                else if (obd->obd_set_up)
+                        status = "UP";
+                else if (obd->obd_attached)
+                        status = "AT";
+                else
+                        status = "--";
                 LCONSOLE(D_CONFIG, "%3d %s %s %s %s %d\n",
-                         obd->obd_minor, status, obd->obd_type->typ_name,
+                         i, status, obd->obd_type->typ_name,
                          obd->obd_name, obd->obd_uuid.uuid,
                          cfs_atomic_read(&obd->obd_refcount));
         }
+        cfs_read_unlock(&obd_dev_lock);
         return;
 }
 
@@ -714,52 +502,64 @@ struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid,
                                           const char * typ_name,
                                           struct obd_uuid *grp_uuid)
 {
-        struct obd_device *obd;
+        int i;
 
-        for (obd_devlist_first(&obd);
-             obd != NULL;
-             obd_devlist_next(&obd)) {
-                /* XXX per type list ? */
+        cfs_read_lock(&obd_dev_lock);
+        for (i = 0; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
+
+                if (obd == NULL)
+                        continue;
                 if ((strncmp(obd->obd_type->typ_name, typ_name,
                              strlen(typ_name)) == 0)) {
                         if (obd_uuid_equals(tgt_uuid,
                                             &obd->u.cli.cl_target_uuid) &&
                             ((grp_uuid)? obd_uuid_equals(grp_uuid,
                                                          &obd->obd_uuid) : 1)) {
-                                obd_devlist_last(obd);
+                                cfs_read_unlock(&obd_dev_lock);
                                 return obd;
                         }
                 }
         }
+        cfs_read_unlock(&obd_dev_lock);
 
         return NULL;
 }
+EXPORT_SYMBOL(class_find_client_obd);
 
 /* Iterate the obd_device list looking devices have grp_uuid. Start
    searching at *next, and if a device is found, the next index to look
    at is saved in *next. If next is NULL, then the first matching device
    will always be returned. */
-struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid,
-                                           struct obd_device **prev)
+struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next)
 {
-        struct obd_device *obd = *prev;
+        int i;
 
-        if (obd == NULL)
-               obd_devlist_first(&obd);
+        if (next == NULL)
+                i = 0;
+        else if (*next >= 0 && *next < class_devno_max())
+                i = *next;
         else
-               obd_devlist_next(&obd);
+                return NULL;
 
+        cfs_read_lock(&obd_dev_lock);
+        for (; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
 
-        for (; obd != NULL; obd_devlist_next(&obd)) {
+                if (obd == NULL)
+                        continue;
                 if (obd_uuid_equals(grp_uuid, &obd->obd_uuid)) {
-                        /* XXX return with reference */
-                        *prev = obd;
+                        if (next != NULL)
+                                *next = i+1;
+                        cfs_read_unlock(&obd_dev_lock);
                         return obd;
                 }
         }
+        cfs_read_unlock(&obd_dev_lock);
 
         return NULL;
 }
+EXPORT_SYMBOL(class_devices_in_group);
 
 /**
  * to notify sptlrpc log for \a fsname has changed, let every relevant OBD
@@ -769,14 +569,15 @@ int class_notify_sptlrpc_conf(const char *fsname, int namelen)
 {
         struct obd_device  *obd;
         const char         *type;
-        int                 rc = 0, rc2;
+        int                 i, rc = 0, rc2;
 
         LASSERT(namelen > 0);
 
-        for (obd_devlist_first(&obd);
-             obd != NULL;
-             obd_devlist_next(&obd)) {
-                if (obd->obd_set_up == 0 || obd->obd_stopping)
+        cfs_read_lock(&obd_dev_lock);
+        for (i = 0; i < class_devno_max(); i++) {
+                obd = class_num2obd(i);
+
+                if (obd == NULL || obd->obd_set_up == 0 || obd->obd_stopping)
                         continue;
 
                 /* only notify mdc, osc, mdt, ost */
@@ -790,13 +591,16 @@ int class_notify_sptlrpc_conf(const char *fsname, int namelen)
                 if (strncmp(obd->obd_name, fsname, namelen))
                         continue;
 
-                /** XXX - some new obd can be added at that point */
+                class_incref(obd, __FUNCTION__, obd);
+                cfs_read_unlock(&obd_dev_lock);
                 rc2 = obd_set_info_async(obd->obd_self_export,
                                          sizeof(KEY_SPTLRPC_CONF),
                                          KEY_SPTLRPC_CONF, 0, NULL, NULL);
                 rc = rc ? rc : rc2;
+                class_decref(obd, __FUNCTION__, obd);
+                cfs_read_lock(&obd_dev_lock);
         }
-
+        cfs_read_unlock(&obd_dev_lock);
         return rc;
 }
 EXPORT_SYMBOL(class_notify_sptlrpc_conf);
@@ -886,6 +690,7 @@ struct obd_export *class_conn2export(struct lustre_handle *conn)
         export = class_handle2object(conn->cookie);
         RETURN(export);
 }
+EXPORT_SYMBOL(class_conn2export);
 
 struct obd_device *class_exp2obd(struct obd_export *exp)
 {
@@ -893,6 +698,7 @@ struct obd_device *class_exp2obd(struct obd_export *exp)
                 return exp->exp_obd;
         return NULL;
 }
+EXPORT_SYMBOL(class_exp2obd);
 
 struct obd_device *class_conn2obd(struct lustre_handle *conn)
 {
@@ -905,6 +711,7 @@ struct obd_device *class_conn2obd(struct lustre_handle *conn)
         }
         return NULL;
 }
+EXPORT_SYMBOL(class_conn2obd);
 
 struct obd_import *class_exp2cliimp(struct obd_export *exp)
 {
@@ -913,6 +720,7 @@ struct obd_import *class_exp2cliimp(struct obd_export *exp)
                 return NULL;
         return obd->u.cli.cl_import;
 }
+EXPORT_SYMBOL(class_exp2cliimp);
 
 struct obd_import *class_conn2cliimp(struct lustre_handle *conn)
 {
@@ -921,19 +729,55 @@ struct obd_import *class_conn2cliimp(struct lustre_handle *conn)
                 return NULL;
         return obd->u.cli.cl_import;
 }
+EXPORT_SYMBOL(class_conn2cliimp);
 
 /* Export management functions */
+
+/* if export is involved in recovery then clean up related things */
+void class_export_recovery_cleanup(struct obd_export *exp)
+{
+        struct obd_device *obd = exp->exp_obd;
+
+        cfs_spin_lock(&obd->obd_recovery_task_lock);
+        if (exp->exp_delayed)
+                obd->obd_delayed_clients--;
+        if (obd->obd_recovering && exp->exp_in_recovery) {
+                cfs_spin_lock(&exp->exp_lock);
+                exp->exp_in_recovery = 0;
+                cfs_spin_unlock(&exp->exp_lock);
+                LASSERT_ATOMIC_POS(&obd->obd_connected_clients);
+                cfs_atomic_dec(&obd->obd_connected_clients);
+        }
+        cfs_spin_unlock(&obd->obd_recovery_task_lock);
+        /** Cleanup req replay fields */
+        if (exp->exp_req_replay_needed) {
+                cfs_spin_lock(&exp->exp_lock);
+                exp->exp_req_replay_needed = 0;
+                cfs_spin_unlock(&exp->exp_lock);
+                LASSERT(cfs_atomic_read(&obd->obd_req_replay_clients));
+                cfs_atomic_dec(&obd->obd_req_replay_clients);
+        }
+        /** Cleanup lock replay data */
+        if (exp->exp_lock_replay_needed) {
+                cfs_spin_lock(&exp->exp_lock);
+                exp->exp_lock_replay_needed = 0;
+                cfs_spin_unlock(&exp->exp_lock);
+                LASSERT(cfs_atomic_read(&obd->obd_lock_replay_clients));
+                cfs_atomic_dec(&obd->obd_lock_replay_clients);
+        }
+}
+
 static void class_export_destroy(struct obd_export *exp)
 {
         struct obd_device *obd = exp->exp_obd;
         ENTRY;
 
         LASSERT_ATOMIC_ZERO(&exp->exp_refcount);
+        LASSERT(obd != NULL);
 
         CDEBUG(D_IOCTL, "destroying export %p/%s for %s\n", exp,
                exp->exp_client_uuid.uuid, obd->obd_name);
 
-        LASSERT(obd != NULL);
 
         /* "Local" exports (lctl, LOV->{mdc,osc}) have no connection. */
         if (exp->exp_connection)
@@ -942,7 +786,7 @@ static void class_export_destroy(struct obd_export *exp)
         LASSERT(cfs_list_empty(&exp->exp_outstanding_replies));
         LASSERT(cfs_list_empty(&exp->exp_uncommitted_replies));
         LASSERT(cfs_list_empty(&exp->exp_req_replay_queue));
-        LASSERT(cfs_list_empty(&exp->exp_queued_rpc));
+        LASSERT(cfs_list_empty(&exp->exp_hp_rpcs));
         obd_destroy_export(exp);
         class_decref(obd, "export", exp);
 
@@ -967,7 +811,7 @@ EXPORT_SYMBOL(class_export_get);
 void class_export_put(struct obd_export *exp)
 {
         LASSERT(exp != NULL);
-        LASSERT_ATOMIC_GT_LT(&exp->exp_refcount, 0, 0x5a5a5a);
+        LASSERT_ATOMIC_GT_LT(&exp->exp_refcount, 0, LI_POISON);
         CDEBUG(D_INFO, "PUTting export %p : new refcount %d\n", exp,
                cfs_atomic_read(&exp->exp_refcount) - 1);
 
@@ -978,6 +822,7 @@ void class_export_put(struct obd_export *exp)
 
                 /* release nid stat refererence */
                 lprocfs_exp_cleanup(exp);
+                class_export_recovery_cleanup(exp);
 
                 obd_zombie_export_add(exp);
         }
@@ -1016,13 +861,15 @@ struct obd_export *class_new_export(struct obd_device *obd,
         CFS_INIT_LIST_HEAD(&export->exp_uncommitted_replies);
         CFS_INIT_LIST_HEAD(&export->exp_req_replay_queue);
         CFS_INIT_LIST_HEAD(&export->exp_handle.h_link);
-        CFS_INIT_LIST_HEAD(&export->exp_queued_rpc);
+        CFS_INIT_LIST_HEAD(&export->exp_hp_rpcs);
         class_handle_hash(&export->exp_handle, export_handle_addref);
         export->exp_last_request_time = cfs_time_current_sec();
         cfs_spin_lock_init(&export->exp_lock);
         cfs_spin_lock_init(&export->exp_rpc_lock);
         CFS_INIT_HLIST_NODE(&export->exp_uuid_hash);
         CFS_INIT_HLIST_NODE(&export->exp_nid_hash);
+        cfs_spin_lock_init(&export->exp_bl_list_lock);
+        CFS_INIT_LIST_HEAD(&export->exp_bl_list);
 
         export->exp_sp_peer = LUSTRE_SP_ANY;
         export->exp_flvr.sf_rpc = SPTLRPC_FLVR_INVALID;
@@ -1143,7 +990,7 @@ void class_import_put(struct obd_import *imp)
         ENTRY;
 
         LASSERT(cfs_list_empty(&imp->imp_zombie_chain));
-        LASSERT_ATOMIC_GE_LT(&imp->imp_refcount, 0, 0x5a5a5a);
+        LASSERT_ATOMIC_GT_LT(&imp->imp_refcount, 0, LI_POISON);
 
         CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", imp,
                cfs_atomic_read(&imp->imp_refcount) - 1,
@@ -1186,7 +1033,7 @@ struct obd_import *class_new_import(struct obd_device *obd)
         imp->imp_last_success_conn = 0;
         imp->imp_state = LUSTRE_IMP_NEW;
         imp->imp_obd = class_incref(obd, "import", imp);
-        cfs_sema_init(&imp->imp_sec_mutex, 1);
+        cfs_mutex_init(&imp->imp_sec_mutex);
         cfs_waitq_init(&imp->imp_recovery_waitq);
 
         cfs_atomic_set(&imp->imp_refcount, 2);
@@ -1290,39 +1137,6 @@ int class_connect(struct lustre_handle *conn, struct obd_device *obd,
 }
 EXPORT_SYMBOL(class_connect);
 
-/* if export is involved in recovery then clean up related things */
-void class_export_recovery_cleanup(struct obd_export *exp)
-{
-        struct obd_device *obd = exp->exp_obd;
-
-        cfs_spin_lock(&obd->obd_recovery_task_lock);
-        if (exp->exp_delayed)
-                obd->obd_delayed_clients--;
-        if (obd->obd_recovering && exp->exp_in_recovery) {
-                cfs_spin_lock(&exp->exp_lock);
-                exp->exp_in_recovery = 0;
-                cfs_spin_unlock(&exp->exp_lock);
-                LASSERT(obd->obd_connected_clients);
-                obd->obd_connected_clients--;
-        }
-        cfs_spin_unlock(&obd->obd_recovery_task_lock);
-        /** Cleanup req replay fields */
-        if (exp->exp_req_replay_needed) {
-                cfs_spin_lock(&exp->exp_lock);
-                exp->exp_req_replay_needed = 0;
-                cfs_spin_unlock(&exp->exp_lock);
-                LASSERT(cfs_atomic_read(&obd->obd_req_replay_clients));
-                cfs_atomic_dec(&obd->obd_req_replay_clients);
-        }
-        /** Cleanup lock replay data */
-        if (exp->exp_lock_replay_needed) {
-                cfs_spin_lock(&exp->exp_lock);
-                exp->exp_lock_replay_needed = 0;
-                cfs_spin_unlock(&exp->exp_lock);
-                LASSERT(cfs_atomic_read(&obd->obd_lock_replay_clients));
-                cfs_atomic_dec(&obd->obd_lock_replay_clients);
-        }
-}
 
 /* This function removes 1-3 references from the export:
  * 1 - for export pointer passed
@@ -1336,8 +1150,7 @@ int class_disconnect(struct obd_export *export)
         ENTRY;
 
         if (export == NULL) {
-                fixme();
-                CDEBUG(D_IOCTL, "attempting to free NULL export %p\n", export);
+                CWARN("attempting to free NULL export %p\n", export);
                 RETURN(-EINVAL);
         }
 
@@ -1362,12 +1175,12 @@ int class_disconnect(struct obd_export *export)
                              &export->exp_connection->c_peer.nid,
                              &export->exp_nid_hash);
 
-        class_export_recovery_cleanup(export);
         class_unlink_export(export);
 no_disconn:
         class_export_put(export);
         RETURN(0);
 }
+EXPORT_SYMBOL(class_disconnect);
 
 /* Return non-zero for a fully connected export */
 int class_connected_export(struct obd_export *exp)
@@ -1467,15 +1280,25 @@ void class_disconnect_stale_exports(struct obd_device *obd,
         CFS_INIT_LIST_HEAD(&work_list);
         cfs_spin_lock(&obd->obd_dev_lock);
         cfs_list_for_each_safe(pos, n, &obd->obd_exports) {
+                int failed;
+
                 exp = cfs_list_entry(pos, struct obd_export, exp_obd_chain);
-                if (test_export(exp))
-                        continue;
 
                 /* don't count self-export as client */
                 if (obd_uuid_equals(&exp->exp_client_uuid,
                                     &exp->exp_obd->obd_uuid))
                         continue;
 
+                if (test_export(exp))
+                        continue;
+
+                cfs_spin_lock(&exp->exp_lock);
+                failed = exp->exp_failed;
+                exp->exp_failed = 1;
+                cfs_spin_unlock(&exp->exp_lock);
+                if (failed)
+                        continue;
+
                 cfs_list_move(&exp->exp_obd_chain, &work_list);
                 evicted++;
                 CDEBUG(D_ERROR, "%s: disconnect stale client %s@%s\n",
@@ -1518,6 +1341,9 @@ void class_fail_export(struct obd_export *exp)
         if (obd_dump_on_timeout)
                 libcfs_debug_dumplog();
 
+        /* need for safe call CDEBUG after obd_disconnect */
+        class_export_get(exp);
+
         /* Most callers into obd_disconnect are removing their own reference
          * (request, for example) in addition to the one from the hash table.
          * We don't have such a reference here, so make one. */
@@ -1528,6 +1354,7 @@ void class_fail_export(struct obd_export *exp)
         else
                 CDEBUG(D_HA, "disconnected export %p/%s\n",
                        exp, exp->exp_client_uuid.uuid);
+        class_export_put(exp);
 }
 EXPORT_SYMBOL(class_fail_export);
 
@@ -1681,6 +1508,9 @@ void obd_exports_barrier(struct obd_device *obd)
 }
 EXPORT_SYMBOL(obd_exports_barrier);
 
+/* Total amount of zombies to be destroyed */
+static int zombies_count = 0;
+
 /**
  * kill zombie imports and exports
  */
@@ -1711,11 +1541,19 @@ void obd_zombie_impexp_cull(void)
 
                 cfs_spin_unlock(&obd_zombie_impexp_lock);
 
-                if (import != NULL)
+                if (import != NULL) {
                         class_import_destroy(import);
+                        cfs_spin_lock(&obd_zombie_impexp_lock);
+                        zombies_count--;
+                        cfs_spin_unlock(&obd_zombie_impexp_lock);
+                }
 
-                if (export != NULL)
+                if (export != NULL) {
                         class_export_destroy(export);
+                        cfs_spin_lock(&obd_zombie_impexp_lock);
+                        zombies_count--;
+                        cfs_spin_unlock(&obd_zombie_impexp_lock);
+                }
 
                 cfs_cond_resched();
         } while (import != NULL || export != NULL);
@@ -1740,10 +1578,8 @@ static int obd_zombie_impexp_check(void *arg)
         int rc;
 
         cfs_spin_lock(&obd_zombie_impexp_lock);
-        rc = cfs_list_empty(&obd_zombie_imports) &&
-             cfs_list_empty(&obd_zombie_exports) &&
+        rc = (zombies_count == 0) &&
              !cfs_test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags);
-
         cfs_spin_unlock(&obd_zombie_impexp_lock);
 
         RETURN(rc);
@@ -1758,11 +1594,11 @@ static void obd_zombie_export_add(struct obd_export *exp) {
         cfs_list_del_init(&exp->exp_obd_chain);
         cfs_spin_unlock(&exp->exp_obd->obd_dev_lock);
         cfs_spin_lock(&obd_zombie_impexp_lock);
+        zombies_count++;
         cfs_list_add(&exp->exp_obd_chain, &obd_zombie_exports);
         cfs_spin_unlock(&obd_zombie_impexp_lock);
 
-        if (obd_zombie_impexp_notify != NULL)
-                obd_zombie_impexp_notify();
+        obd_zombie_impexp_notify();
 }
 
 /**
@@ -1772,11 +1608,11 @@ static void obd_zombie_import_add(struct obd_import *imp) {
         LASSERT(imp->imp_sec == NULL);
         cfs_spin_lock(&obd_zombie_impexp_lock);
         LASSERT(cfs_list_empty(&imp->imp_zombie_chain));
+        zombies_count++;
         cfs_list_add(&imp->imp_zombie_chain, &obd_zombie_imports);
         cfs_spin_unlock(&obd_zombie_impexp_lock);
 
-        if (obd_zombie_impexp_notify != NULL)
-                obd_zombie_impexp_notify();
+        obd_zombie_impexp_notify();
 }
 
 /**
@@ -1784,7 +1620,12 @@ static void obd_zombie_import_add(struct obd_import *imp) {
  */
 static void obd_zombie_impexp_notify(void)
 {
-        cfs_waitq_signal(&obd_zombie_waitq);
+        /*
+         * Make sure obd_zomebie_impexp_thread get this notification.
+         * It is possible this signal only get by obd_zombie_barrier, and
+         * barrier gulps this notification and sleeps away and hangs ensues
+         */
+        cfs_waitq_broadcast(&obd_zombie_waitq);
 }
 
 /**
@@ -1796,8 +1637,7 @@ static int obd_zombie_is_idle(void)
 
         LASSERT(!cfs_test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags));
         cfs_spin_lock(&obd_zombie_impexp_lock);
-        rc = cfs_list_empty(&obd_zombie_imports) &&
-             cfs_list_empty(&obd_zombie_exports);
+        rc = (zombies_count == 0);
         cfs_spin_unlock(&obd_zombie_impexp_lock);
         return rc;
 }
@@ -1889,7 +1729,7 @@ int obd_zombie_impexp_init(void)
         obd_zombie_pid = 0;
 
 #ifdef __KERNEL__
-        rc = cfs_kernel_thread(obd_zombie_impexp_thread, NULL, 0);
+        rc = cfs_create_thread(obd_zombie_impexp_thread, NULL, 0);
         if (rc < 0)
                 RETURN(rc);