Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / obdclass / genops.c
index e5be2bc..a74ee18 100644 (file)
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * GPL HEADER START
  *
- *   This file is part of Lustre, http://www.lustre.org.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
  *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
  *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/genops.c
  *
  * These are the only exported functions, they provide some generic
  * infrastructure for managing object devices
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
-#include <linux/kmod.h>   /* for request_module() */
-#include <linux/module.h>
-#include <linux/obd_class.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <linux/lprocfs_status.h>
+#ifndef __KERNEL__
+#include <liblustre.h>
+#endif
+#include <obd_ost.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
+#include <class_hash.h>
 
 extern struct list_head obd_types;
-kmem_cache_t *obdo_cachep = NULL;
-kmem_cache_t *import_cachep = NULL;
-kmem_cache_t *export_cachep = NULL;
+spinlock_t obd_types_lock;
+
+cfs_mem_cache_t *obd_device_cachep;
+cfs_mem_cache_t *obdo_cachep;
+EXPORT_SYMBOL(obdo_cachep);
+cfs_mem_cache_t *import_cachep;
+
+struct list_head  obd_zombie_imports;
+struct list_head  obd_zombie_exports;
+spinlock_t        obd_zombie_impexp_lock;
+static void obd_zombie_impexp_notify(void);
+static void obd_zombie_export_add(struct obd_export *exp);
+static void obd_zombie_import_add(struct obd_import *imp);
 
 int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
-void (*ptlrpc_abort_inflight_superhack)(struct obd_import *imp,
-                                        int dying_import);
 
 /*
  * support functions: we could use inter-module communication, but this
  * is more portable to other OS's
  */
-static struct obd_type *class_search_type(char *name)
+static struct obd_device *obd_device_alloc(void)
+{
+        struct obd_device *obd;
+
+        OBD_SLAB_ALLOC_PTR_GFP(obd, obd_device_cachep, CFS_ALLOC_IO);
+        if (obd != NULL) {
+                obd->obd_magic = OBD_DEVICE_MAGIC;
+        }
+        return obd;
+}
+EXPORT_SYMBOL(obd_device_alloc);
+
+static void obd_device_free(struct obd_device *obd)
+{
+        LASSERT(obd != NULL);
+        LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, "obd %p obd_magic %08x != %08x\n",
+                 obd, obd->obd_magic, OBD_DEVICE_MAGIC);
+        if (obd->obd_namespace != NULL) {
+                CERROR("obd %p: namespace %p was not properly cleaned up (obd_force=%d)!\n",
+                       obd, obd->obd_namespace, obd->obd_force);
+                LBUG();
+        }
+        lu_ref_fini(&obd->obd_reference);
+        OBD_SLAB_FREE_PTR(obd, obd_device_cachep);
+}
+
+struct obd_type *class_search_type(const char *name)
 {
         struct list_head *tmp;
         struct obd_type *type;
-        CDEBUG(D_INFO, "SEARCH %s\n", name);
 
-        tmp = &obd_types;
+        spin_lock(&obd_types_lock);
         list_for_each(tmp, &obd_types) {
                 type = list_entry(tmp, struct obd_type, typ_chain);
-                CDEBUG(D_INFO, "TYP %s\n", type->typ_name);
-                if (strlen(type->typ_name) == strlen(name) &&
-                    strcmp(type->typ_name, name) == 0) {
+                if (strcmp(type->typ_name, name) == 0) {
+                        spin_unlock(&obd_types_lock);
                         return type;
                 }
         }
+        spin_unlock(&obd_types_lock);
         return NULL;
 }
 
-struct obd_type *class_get_type(char *name)
+struct obd_type *class_get_type(const char *name)
 {
         struct obd_type *type = class_search_type(name);
 
 #ifdef CONFIG_KMOD
         if (!type) {
-                if (!request_module(name)) {
-                        CDEBUG(D_INFO, "Loaded module '%s'\n", name);
+                const char *modname = name;
+                if (!request_module(modname)) {
+                        CDEBUG(D_INFO, "Loaded module '%s'\n", modname);
                         type = class_search_type(name);
-                } else
-                        CDEBUG(D_INFO, "Can't load module '%s'\n", name);
+                } else {
+                        LCONSOLE_ERROR_MSG(0x158, "Can't load module '%s'\n",
+                                           modname);
+                }
         }
 #endif
-        if (type)
-                __MOD_INC_USE_COUNT(type->typ_ops->o_owner);
+        if (type) {
+                spin_lock(&type->obd_type_lock);
+                type->typ_refcnt++;
+                try_module_get(type->typ_dt_ops->o_owner);
+                spin_unlock(&type->obd_type_lock);
+        }
         return type;
 }
 
 void class_put_type(struct obd_type *type)
 {
         LASSERT(type);
-        __MOD_DEC_USE_COUNT(type->typ_ops->o_owner);
+        spin_lock(&type->obd_type_lock);
+        type->typ_refcnt--;
+        module_put(type->typ_dt_ops->o_owner);
+        spin_unlock(&type->obd_type_lock);
 }
 
-int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars,
-                        char *name)
+#define CLASS_MAX_NAME 1024
+
+int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
+                        struct lprocfs_vars *vars, const char *name,
+                        struct lu_device_type *ldt)
 {
         struct obd_type *type;
         int rc = 0;
         ENTRY;
 
-        LASSERT(strnlen(name, 1024) < 1024);    /* sanity check */
+        /* sanity check */
+        LASSERT(strnlen(name, CLASS_MAX_NAME) < CLASS_MAX_NAME);
 
         if (class_search_type(name)) {
                 CDEBUG(D_IOCTL, "Type %s already registered\n", name);
@@ -104,35 +169,56 @@ int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars,
         if (type == NULL)
                 RETURN(rc);
 
-        OBD_ALLOC(type->typ_ops, sizeof(*type->typ_ops));
+        OBD_ALLOC_PTR(type->typ_dt_ops);
+        OBD_ALLOC_PTR(type->typ_md_ops);
         OBD_ALLOC(type->typ_name, strlen(name) + 1);
-        if (type->typ_ops == NULL || type->typ_name == NULL)
+
+        if (type->typ_dt_ops == NULL ||
+            type->typ_md_ops == NULL ||
+            type->typ_name == NULL)
                 GOTO (failed, rc);
 
-        *(type->typ_ops) = *ops;
+        *(type->typ_dt_ops) = *dt_ops;
+        /* md_ops is optional */
+        if (md_ops)
+                *(type->typ_md_ops) = *md_ops;
         strcpy(type->typ_name, name);
-        list_add(&type->typ_chain, &obd_types);
+        spin_lock_init(&type->obd_type_lock);
 
+#ifdef LPROCFS
         type->typ_procroot = lprocfs_register(type->typ_name, proc_lustre_root,
                                               vars, type);
         if (IS_ERR(type->typ_procroot)) {
                 rc = PTR_ERR(type->typ_procroot);
                 type->typ_procroot = NULL;
-                list_del(&type->typ_chain);
                 GOTO (failed, rc);
         }
+#endif
+        if (ldt != NULL) {
+                type->typ_lu = ldt;
+                rc = lu_device_type_init(ldt);
+                if (rc != 0)
+                        GOTO (failed, rc);
+        }
+
+        spin_lock(&obd_types_lock);
+        list_add(&type->typ_chain, &obd_types);
+        spin_unlock(&obd_types_lock);
 
         RETURN (0);
 
  failed:
-        if (type->typ_ops != NULL)
+        if (type->typ_name != NULL)
                 OBD_FREE(type->typ_name, strlen(name) + 1);
-        if (type->typ_ops != NULL)
-                OBD_FREE (type->typ_ops, sizeof (*type->typ_ops));
+        if (type->typ_md_ops != NULL)
+                OBD_FREE_PTR(type->typ_md_ops);
+        if (type->typ_dt_ops != NULL)
+                OBD_FREE_PTR(type->typ_dt_ops);
+        OBD_FREE(type, sizeof(*type));
         RETURN(rc);
 }
 
-int class_unregister_type(char *name)
+int class_unregister_type(const char *name)
 {
         struct obd_type *type = class_search_type(name);
         ENTRY;
@@ -146,93 +232,382 @@ int class_unregister_type(char *name)
                 CERROR("type %s has refcount (%d)\n", name, type->typ_refcnt);
                 /* This is a bad situation, let's make the best of it */
                 /* Remove ops, but leave the name for debugging */
-                OBD_FREE(type->typ_ops, sizeof(*type->typ_ops));
+                OBD_FREE_PTR(type->typ_dt_ops);
+                OBD_FREE_PTR(type->typ_md_ops);
                 RETURN(-EBUSY);
         }
 
         if (type->typ_procroot) {
-                lprocfs_remove(type->typ_procroot);
-                type->typ_procroot = NULL;
+                lprocfs_remove(&type->typ_procroot);
         }
 
+        if (type->typ_lu)
+                lu_device_type_fini(type->typ_lu);
+
+        spin_lock(&obd_types_lock);
         list_del(&type->typ_chain);
+        spin_unlock(&obd_types_lock);
         OBD_FREE(type->typ_name, strlen(name) + 1);
-        if (type->typ_ops != NULL)
-                OBD_FREE(type->typ_ops, sizeof(*type->typ_ops));
+        if (type->typ_dt_ops != NULL)
+                OBD_FREE_PTR(type->typ_dt_ops);
+        if (type->typ_md_ops != NULL)
+                OBD_FREE_PTR(type->typ_md_ops);
         OBD_FREE(type, sizeof(*type));
         RETURN(0);
 } /* class_unregister_type */
 
-int class_name2dev(char *name)
+/**
+ * Create a new obd device.
+ *
+ * Find an empty slot in ::obd_devs[], create a new obd device in it.
+ *
+ * \param typename [in] obd device type string.
+ * \param name     [in] obd device name.
+ *
+ * \retval NULL if create fails, otherwise return the obd device
+ *         pointer created.
+ */
+struct obd_device *class_newdev(const char *type_name, const char *name)
+{
+        struct obd_device *result = NULL;
+        struct obd_device *newdev;
+        struct obd_type *type = NULL;
+        int i;
+        int new_obd_minor = 0;
+
+        if (strlen(name) >= MAX_OBD_NAME) {
+                CERROR("name/uuid must be < %u bytes long\n", MAX_OBD_NAME);
+                RETURN(ERR_PTR(-EINVAL));
+        }
+
+        type = class_get_type(type_name);
+        if (type == NULL){
+                CERROR("OBD: unknown type: %s\n", type_name);
+                RETURN(ERR_PTR(-ENODEV));
+        }
+
+        newdev = obd_device_alloc();
+        if (newdev == NULL) {
+                class_put_type(type);
+                RETURN(ERR_PTR(-ENOMEM));
+        }
+        LASSERT(newdev->obd_magic == OBD_DEVICE_MAGIC);
+
+        spin_lock(&obd_dev_lock);
+        for (i = 0; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
+                if (obd && obd->obd_name &&
+                    (strcmp(name, obd->obd_name) == 0)) {
+                        CERROR("Device %s already exists, won't add\n", name);
+                        if (result) {
+                                LASSERTF(result->obd_magic == OBD_DEVICE_MAGIC,
+                                         "%p obd_magic %08x != %08x\n", result,
+                                         result->obd_magic, OBD_DEVICE_MAGIC);
+                                LASSERTF(result->obd_minor == new_obd_minor,
+                                         "%p obd_minor %d != %d\n", result,
+                                         result->obd_minor, new_obd_minor);
+
+                                obd_devs[result->obd_minor] = NULL;
+                                result->obd_name[0]='\0';
+                         }
+                        result = ERR_PTR(-EEXIST);
+                        break;
+                }
+                if (!result && !obd) {
+                        result = newdev;
+                        result->obd_minor = i;
+                        new_obd_minor = i;
+                        result->obd_type = type;
+                        strncpy(result->obd_name, name,
+                                sizeof(result->obd_name) - 1);
+                        obd_devs[i] = result;
+                }
+        }
+        spin_unlock(&obd_dev_lock);
+
+        if (result == NULL && i >= class_devno_max()) {
+                CERROR("all %u OBD devices used, increase MAX_OBD_DEVICES\n",
+                       class_devno_max());
+                result = ERR_PTR(-EOVERFLOW);
+        }
+
+        if (IS_ERR(result)) {
+                obd_device_free(newdev);
+                class_put_type(type);
+        } else {
+                CDEBUG(D_IOCTL, "Adding new device %s (%p)\n",
+                       result->obd_name, result);
+        }
+        return result;
+}
+
+void class_release_dev(struct obd_device *obd)
+{
+        struct obd_type *obd_type = obd->obd_type;
+
+        LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, "%p obd_magic %08x != %08x\n",
+                 obd, obd->obd_magic, OBD_DEVICE_MAGIC);
+        LASSERTF(obd == obd_devs[obd->obd_minor], "obd %p != obd_devs[%d] %p\n",
+                 obd, obd->obd_minor, obd_devs[obd->obd_minor]);
+        LASSERT(obd_type != NULL);
+
+        CDEBUG(D_INFO, "Release obd device %s obd_type name =%s\n",
+               obd->obd_name,obd->obd_type->typ_name);
+
+        spin_lock(&obd_dev_lock);
+        obd_devs[obd->obd_minor] = NULL;
+        spin_unlock(&obd_dev_lock);
+        obd_device_free(obd);
+
+        class_put_type(obd_type);
+}
+
+int class_name2dev(const char *name)
 {
-        int res = -1;
         int i;
 
         if (!name)
                 return -1;
 
-        for (i = 0; i < MAX_OBD_DEVICES; i++) {
-                struct obd_device *obd = &obd_dev[i];
-                if (obd->obd_name && strcmp(name, obd->obd_name) == 0) {
-                        res = i;
-                        return res;
+        spin_lock(&obd_dev_lock);
+        for (i = 0; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
+                if (obd && obd->obd_name && strcmp(name, obd->obd_name) == 0) {
+                        /* Make sure we finished attaching before we give
+                           out any references */
+                        LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
+                        if (obd->obd_attached) {
+                                spin_unlock(&obd_dev_lock);
+                                return i;
+                        }
+                        break;
                 }
         }
+        spin_unlock(&obd_dev_lock);
+
+        return -1;
+}
+
+struct obd_device *class_name2obd(const char *name)
+{
+        int dev = class_name2dev(name);
 
-        return res;
+        if (dev < 0 || dev > class_devno_max())
+                return NULL;
+        return class_num2obd(dev);
 }
 
 int class_uuid2dev(struct obd_uuid *uuid)
 {
-        int res = -1;
         int i;
 
-        for (i = 0; i < MAX_OBD_DEVICES; i++) {
-                struct obd_device *obd = &obd_dev[i];
-                if (strncmp(uuid->uuid, obd->obd_uuid.uuid, sizeof(obd->obd_uuid.uuid)) == 0) {
-                        res = i;
-                        return res;
+        spin_lock(&obd_dev_lock);
+        for (i = 0; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
+                if (obd && obd_uuid_equals(uuid, &obd->obd_uuid)) {
+                        LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
+                        spin_unlock(&obd_dev_lock);
+                        return i;
                 }
         }
+        spin_unlock(&obd_dev_lock);
 
-        return res;
+        return -1;
 }
 
-
 struct obd_device *class_uuid2obd(struct obd_uuid *uuid)
 {
+        int dev = class_uuid2dev(uuid);
+        if (dev < 0)
+                return NULL;
+        return class_num2obd(dev);
+}
+
+/**
+ * Get obd device from ::obd_devs[]
+ *
+ * \param num [in] array index
+ *
+ * \retval NULL if ::obd_devs[\a num] does not contains an obd device
+ *         otherwise return the obd device there.
+ */
+struct obd_device *class_num2obd(int num)
+{
+        struct obd_device *obd = NULL;
+
+        if (num < class_devno_max()) {
+                obd = obd_devs[num];
+                if (obd == NULL)
+                        return NULL;
+
+                LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC,
+                         "%p obd_magic %08x != %08x\n",
+                         obd, obd->obd_magic, OBD_DEVICE_MAGIC);
+                LASSERTF(obd->obd_minor == num,
+                         "%p obd_minor %0d != %0d\n",
+                         obd, obd->obd_minor, num);
+        }
+
+        return obd;
+}
+
+void class_obd_list(void)
+{
+        char *status;
+        int i;
+
+        spin_lock(&obd_dev_lock);
+        for (i = 0; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
+                if (obd == NULL)
+                        continue;
+                if (obd->obd_stopping)
+                        status = "ST";
+                else if (obd->obd_set_up)
+                        status = "UP";
+                else if (obd->obd_attached)
+                        status = "AT";
+                else
+                        status = "--";
+                LCONSOLE(D_CONFIG, "%3d %s %s %s %s %d\n",
+                         i, status, obd->obd_type->typ_name,
+                         obd->obd_name, obd->obd_uuid.uuid,
+                         atomic_read(&obd->obd_refcount));
+        }
+        spin_unlock(&obd_dev_lock);
+        return;
+}
+
+/* Search for a client OBD connected to tgt_uuid.  If grp_uuid is
+   specified, then only the client with that uuid is returned,
+   otherwise any client connected to the tgt is returned. */
+struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid,
+                                          const char * typ_name,
+                                          struct obd_uuid *grp_uuid)
+{
         int i;
 
-        for (i = 0; i < MAX_OBD_DEVICES; i++) {
-                struct obd_device *obd = &obd_dev[i];
-                if (strncmp(uuid->uuid, obd->obd_uuid.uuid, sizeof(obd->obd_uuid.uuid)) == 0)
+        spin_lock(&obd_dev_lock);
+        for (i = 0; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
+                if (obd == NULL)
+                        continue;
+                if ((strncmp(obd->obd_type->typ_name, typ_name,
+                             strlen(typ_name)) == 0)) {
+                        if (obd_uuid_equals(tgt_uuid,
+                                            &obd->u.cli.cl_target_uuid) &&
+                            ((grp_uuid)? obd_uuid_equals(grp_uuid,
+                                                         &obd->obd_uuid) : 1)) {
+                                spin_unlock(&obd_dev_lock);
+                                return obd;
+                        }
+                }
+        }
+        spin_unlock(&obd_dev_lock);
+
+        return NULL;
+}
+
+/* Iterate the obd_device list looking devices have grp_uuid. Start
+   searching at *next, and if a device is found, the next index to look
+   at is saved in *next. If next is NULL, then the first matching device
+   will always be returned. */
+struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next)
+{
+        int i;
+
+        if (next == NULL)
+                i = 0;
+        else if (*next >= 0 && *next < class_devno_max())
+                i = *next;
+        else
+                return NULL;
+
+        spin_lock(&obd_dev_lock);
+        for (; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
+                if (obd == NULL)
+                        continue;
+                if (obd_uuid_equals(grp_uuid, &obd->obd_uuid)) {
+                        if (next != NULL)
+                                *next = i+1;
+                        spin_unlock(&obd_dev_lock);
                         return obd;
+                }
         }
+        spin_unlock(&obd_dev_lock);
 
         return NULL;
 }
 
+/**
+ * to notify sptlrpc log for @fsname has changed, let every relevant OBD
+ * adjust sptlrpc settings accordingly.
+ */
+int class_notify_sptlrpc_conf(const char *fsname, int namelen)
+{
+        struct obd_device  *obd;
+        const char         *type;
+        int                 i, rc = 0, rc2;
+
+        LASSERT(namelen > 0);
+
+        spin_lock(&obd_dev_lock);
+        for (i = 0; i < class_devno_max(); i++) {
+                obd = class_num2obd(i);
+
+                if (obd == NULL || obd->obd_set_up == 0 || obd->obd_stopping)
+                        continue;
+
+                /* only notify mdc, osc, mdt, ost */
+                type = obd->obd_type->typ_name;
+                if (strcmp(type, LUSTRE_MDC_NAME) != 0 &&
+                    strcmp(type, LUSTRE_OSC_NAME) != 0 &&
+                    strcmp(type, LUSTRE_MDT_NAME) != 0 &&
+                    strcmp(type, LUSTRE_OST_NAME) != 0)
+                        continue;
+
+                if (strncmp(obd->obd_name, fsname, namelen))
+                        continue;
+
+                class_incref(obd, __FUNCTION__, obd);
+                spin_unlock(&obd_dev_lock);
+                rc2 = obd_set_info_async(obd->obd_self_export,
+                                         sizeof(KEY_SPTLRPC_CONF),
+                                         KEY_SPTLRPC_CONF, 0, NULL, NULL);
+                rc = rc ? rc : rc2;
+                class_decref(obd, __FUNCTION__, obd);
+                spin_lock(&obd_dev_lock);
+        }
+        spin_unlock(&obd_dev_lock);
+        return rc;
+}
+EXPORT_SYMBOL(class_notify_sptlrpc_conf);
+
 void obd_cleanup_caches(void)
 {
         int rc;
+
         ENTRY;
+        if (obd_device_cachep) {
+                rc = cfs_mem_cache_destroy(obd_device_cachep);
+                LASSERTF(rc == 0, "Cannot destropy ll_obd_device_cache: rc %d\n", rc);
+                obd_device_cachep = NULL;
+        }
         if (obdo_cachep) {
-                rc = kmem_cache_destroy(obdo_cachep);
-                if (rc)
-                        CERROR("Cannot destory ll_obdo_cache\n");
+                rc = cfs_mem_cache_destroy(obdo_cachep);
+                LASSERTF(rc == 0, "Cannot destory ll_obdo_cache\n");
                 obdo_cachep = NULL;
         }
         if (import_cachep) {
-                rc = kmem_cache_destroy(import_cachep);
-                if (rc)
-                        CERROR("Cannot destory ll_import_cache\n");
+                rc = cfs_mem_cache_destroy(import_cachep);
+                LASSERTF(rc == 0, "Cannot destory ll_import_cache\n");
                 import_cachep = NULL;
         }
-        if (export_cachep) {
-                rc = kmem_cache_destroy(export_cachep);
-                if (rc)
-                        CERROR("Cannot destory ll_export_cache\n");
-                export_cachep = NULL;
+        if (capa_cachep) {
+                rc = cfs_mem_cache_destroy(capa_cachep);
+                LASSERTF(rc == 0, "Cannot destory capa_cache\n");
+                capa_cachep = NULL;
         }
         EXIT;
 }
@@ -240,26 +615,33 @@ void obd_cleanup_caches(void)
 int obd_init_caches(void)
 {
         ENTRY;
-        LASSERT(obdo_cachep == NULL);
-        obdo_cachep = kmem_cache_create("ll_obdo_cache", sizeof(struct obdo),
-                                        0, 0, NULL, NULL);
-        if (!obdo_cachep)
+
+        LASSERT(obd_device_cachep == NULL);
+        obd_device_cachep = cfs_mem_cache_create("ll_obd_dev_cache",
+                                                 sizeof(struct obd_device),
+                                                 0, 0);
+        if (!obd_device_cachep)
                 GOTO(out, -ENOMEM);
 
-        LASSERT(export_cachep == NULL);
-        export_cachep = kmem_cache_create("ll_export_cache",
-                                          sizeof(struct obd_export),
-                                          0, 0, NULL, NULL);
-        if (!export_cachep)
+        LASSERT(obdo_cachep == NULL);
+        obdo_cachep = cfs_mem_cache_create("ll_obdo_cache", sizeof(struct obdo),
+                                           0, 0);
+        if (!obdo_cachep)
                 GOTO(out, -ENOMEM);
 
         LASSERT(import_cachep == NULL);
-        import_cachep = kmem_cache_create("ll_import_cache",
-                                          sizeof(struct obd_import),
-                                          0, 0, NULL, NULL);
+        import_cachep = cfs_mem_cache_create("ll_import_cache",
+                                             sizeof(struct obd_import),
+                                             0, 0);
         if (!import_cachep)
                 GOTO(out, -ENOMEM);
 
+        LASSERT(capa_cachep == NULL);
+        capa_cachep = cfs_mem_cache_create("capa_cache",
+                                           sizeof(struct obd_capa), 0, 0);
+        if (!capa_cachep)
+                GOTO(out, -ENOMEM);
+
         RETURN(0);
  out:
         obd_cleanup_caches();
@@ -278,265 +660,870 @@ struct obd_export *class_conn2export(struct lustre_handle *conn)
                 RETURN(NULL);
         }
 
-        if (conn->addr == -1) {  /* this means assign a new connection */
+        if (conn->cookie == -1) {  /* this means assign a new connection */
                 CDEBUG(D_CACHE, "want a new connection\n");
                 RETURN(NULL);
         }
 
-        if (!conn->addr) {
-                CDEBUG(D_CACHE, "looking for null addr\n");
-                fixme();
-                RETURN(NULL);
-        }
-
-        CDEBUG(D_IOCTL, "looking for export addr "LPX64" cookie "LPX64"\n",
-               conn->addr, conn->cookie);
-        export = (struct obd_export *) (unsigned long)conn->addr;
-        if (!kmem_cache_validate(export_cachep, (void *)export))
-                RETURN(NULL);
-
-        if (export->exp_cookie != conn->cookie)
-                RETURN(NULL);
+        CDEBUG(D_INFO, "looking for export cookie "LPX64"\n", conn->cookie);
+        export = class_handle2object(conn->cookie);
         RETURN(export);
-} /* class_conn2export */
+}
+
+struct obd_device *class_exp2obd(struct obd_export *exp)
+{
+        if (exp)
+                return exp->exp_obd;
+        return NULL;
+}
 
 struct obd_device *class_conn2obd(struct lustre_handle *conn)
 {
         struct obd_export *export;
         export = class_conn2export(conn);
-        if (export)
-                return export->exp_obd;
-        fixme();
+        if (export) {
+                struct obd_device *obd = export->exp_obd;
+                class_export_put(export);
+                return obd;
+        }
         return NULL;
 }
 
+struct obd_import *class_exp2cliimp(struct obd_export *exp)
+{
+        struct obd_device *obd = exp->exp_obd;
+        if (obd == NULL)
+                return NULL;
+        return obd->u.cli.cl_import;
+}
+
 struct obd_import *class_conn2cliimp(struct lustre_handle *conn)
 {
-        return &class_conn2obd(conn)->u.cli.cl_import;
+        struct obd_device *obd = class_conn2obd(conn);
+        if (obd == NULL)
+                return NULL;
+        return obd->u.cli.cl_import;
 }
 
-struct obd_import *class_conn2ldlmimp(struct lustre_handle *conn)
+/* Export management functions */
+static void class_export_destroy(struct obd_export *exp)
 {
-        return &class_conn2export(conn)->exp_ldlm_data.led_import;
+        struct obd_device *obd = exp->exp_obd;
+        ENTRY;
+
+        LASSERT (atomic_read(&exp->exp_refcount) == 0);
+
+        CDEBUG(D_IOCTL, "destroying export %p/%s for %s\n", exp,
+               exp->exp_client_uuid.uuid, obd->obd_name);
+
+        LASSERT(obd != NULL);
+
+        /* "Local" exports (lctl, LOV->{mdc,osc}) have no connection. */
+        if (exp->exp_connection)
+                ptlrpc_put_connection_superhack(exp->exp_connection);
+
+        LASSERT(list_empty(&exp->exp_outstanding_replies));
+        LASSERT(list_empty(&exp->exp_uncommitted_replies));
+        LASSERT(list_empty(&exp->exp_req_replay_queue));
+        LASSERT(list_empty(&exp->exp_queued_rpc));
+        obd_destroy_export(exp);
+        class_decref(obd, "export", exp);
+
+        OBD_FREE_RCU(exp, sizeof(*exp), &exp->exp_handle);
+        EXIT;
 }
 
-struct obd_export *class_new_export(struct obd_device *obddev)
+static void export_handle_addref(void *export)
 {
-        struct obd_export * export;
+        class_export_get(export);
+}
 
-        export = kmem_cache_alloc(export_cachep, GFP_KERNEL);
-        if (!export) {
-                CERROR("no memory! (minor %d)\n", obddev->obd_minor);
-                return NULL;
+struct obd_export *class_export_get(struct obd_export *exp)
+{
+        atomic_inc(&exp->exp_refcount);
+        CDEBUG(D_INFO, "GETting export %p : new refcount %d\n", exp,
+               atomic_read(&exp->exp_refcount));
+        return exp;
+}
+EXPORT_SYMBOL(class_export_get);
+
+void class_export_put(struct obd_export *exp)
+{
+        LASSERT(exp != NULL);
+        CDEBUG(D_INFO, "PUTting export %p : new refcount %d\n", exp,
+               atomic_read(&exp->exp_refcount) - 1);
+        LASSERT(atomic_read(&exp->exp_refcount) > 0);
+        LASSERT(atomic_read(&exp->exp_refcount) < 0x5a5a5a);
+
+        if (atomic_dec_and_test(&exp->exp_refcount)) {
+                CDEBUG(D_IOCTL, "final put %p/%s\n",
+                       exp, exp->exp_client_uuid.uuid);
+                obd_zombie_export_add(exp);
         }
+}
+EXPORT_SYMBOL(class_export_put);
+
+/* Creates a new export, adds it to the hash table, and returns a
+ * pointer to it. The refcount is 2: one for the hash reference, and
+ * one for the pointer returned by this function. */
+struct obd_export *class_new_export(struct obd_device *obd,
+                                    struct obd_uuid *cluuid)
+{
+        struct obd_export *export;
+        int rc = 0;
 
-        memset(export, 0, sizeof(*export));
-        get_random_bytes(&export->exp_cookie, sizeof(export->exp_cookie));
-        export->exp_obd = obddev;
-        /* XXX this should be in LDLM init */
-        INIT_LIST_HEAD(&export->exp_ldlm_data.led_held_locks);
-        INIT_LIST_HEAD(&export->exp_conn_chain);
-        spin_lock(&obddev->obd_dev_lock);
+        OBD_ALLOC_PTR(export);
+        if (!export)
+                return ERR_PTR(-ENOMEM);
+
+        export->exp_conn_cnt = 0;
+        export->exp_lock_hash = NULL;
+        atomic_set(&export->exp_refcount, 2);
+        atomic_set(&export->exp_rpc_count, 0);
+        export->exp_obd = obd;
+        CFS_INIT_LIST_HEAD(&export->exp_outstanding_replies);
+        spin_lock_init(&export->exp_uncommitted_replies_lock);
+        CFS_INIT_LIST_HEAD(&export->exp_uncommitted_replies);
+        CFS_INIT_LIST_HEAD(&export->exp_req_replay_queue);
+        CFS_INIT_LIST_HEAD(&export->exp_handle.h_link);
+        CFS_INIT_LIST_HEAD(&export->exp_queued_rpc);
+        class_handle_hash(&export->exp_handle, export_handle_addref);
+        export->exp_last_request_time = cfs_time_current_sec();
+        spin_lock_init(&export->exp_lock);
+        INIT_HLIST_NODE(&export->exp_uuid_hash);
+        INIT_HLIST_NODE(&export->exp_nid_hash);
+
+        export->exp_sp_peer = LUSTRE_SP_ANY;
+        export->exp_flvr.sf_rpc = SPTLRPC_FLVR_INVALID;
+        export->exp_client_uuid = *cluuid;
+        obd_init_export(export);
+
+        spin_lock(&obd->obd_dev_lock);
+        if (!obd_uuid_equals(cluuid, &obd->obd_uuid)) {
+                rc = lustre_hash_add_unique(obd->obd_uuid_hash, cluuid,
+                                            &export->exp_uuid_hash);
+                if (rc != 0) {
+                        LCONSOLE_WARN("%s: denying duplicate export for %s, %d\n",
+                                      obd->obd_name, cluuid->uuid, rc);
+                        spin_unlock(&obd->obd_dev_lock);
+                        class_handle_unhash(&export->exp_handle);
+                        OBD_FREE_PTR(export);
+                        return ERR_PTR(-EALREADY);
+                }
+        }
+
+        LASSERT(!obd->obd_stopping); /* shouldn't happen, but might race */
+        class_incref(obd, "export", export);
         list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports);
-        spin_unlock(&obddev->obd_dev_lock);
+        list_add_tail(&export->exp_obd_chain_timed,
+                      &export->exp_obd->obd_exports_timed);
+        export->exp_obd->obd_num_exports++;
+        spin_unlock(&obd->obd_dev_lock);
+
         return export;
 }
+EXPORT_SYMBOL(class_new_export);
 
-void class_destroy_export(struct obd_export *exp)
+void class_unlink_export(struct obd_export *exp)
 {
-        ENTRY;
-
-        LASSERT(exp->exp_cookie != DEAD_HANDLE_MAGIC);
+        class_handle_unhash(&exp->exp_handle);
 
         spin_lock(&exp->exp_obd->obd_dev_lock);
-        list_del(&exp->exp_obd_chain);
+        /* delete an uuid-export hashitem from hashtables */
+        if (!hlist_unhashed(&exp->exp_uuid_hash))
+                lustre_hash_del(exp->exp_obd->obd_uuid_hash,
+                                &exp->exp_client_uuid,
+                                &exp->exp_uuid_hash);
+
+        list_del_init(&exp->exp_obd_chain);
+        list_del_init(&exp->exp_obd_chain_timed);
+        exp->exp_obd->obd_num_exports--;
         spin_unlock(&exp->exp_obd->obd_dev_lock);
 
-        /* XXXshaver no connection here... */
-        if (exp->exp_connection)
-                spin_lock(&exp->exp_connection->c_lock);
-        list_del(&exp->exp_conn_chain);
-        if (exp->exp_connection) {
-                spin_unlock(&exp->exp_connection->c_lock);
-                ptlrpc_put_connection_superhack(exp->exp_connection);
-        }
+        /* Keep these counter valid always */
+        spin_lock_bh(&exp->exp_obd->obd_processing_task_lock);
+        if (exp->exp_delayed)
+                exp->exp_obd->obd_delayed_clients--;
+        else if (exp->exp_in_recovery)
+                exp->exp_obd->obd_recoverable_clients--;
+        else if (exp->exp_obd->obd_recovering)
+                exp->exp_obd->obd_max_recoverable_clients--;
+        spin_unlock_bh(&exp->exp_obd->obd_processing_task_lock);
+        class_export_put(exp);
+}
+EXPORT_SYMBOL(class_unlink_export);
+
+/* Import management functions */
+void class_import_destroy(struct obd_import *imp)
+{
+        ENTRY;
+
+        CDEBUG(D_IOCTL, "destroying import %p for %s\n", imp,
+                imp->imp_obd->obd_name);
 
-        /* Abort any inflight DLM requests and NULL out their (about to be
-         * freed) import. */
-        if (exp->exp_ldlm_data.led_import.imp_obd)
-                ptlrpc_abort_inflight_superhack(&exp->exp_ldlm_data.led_import,
-                                                1);
+        LASSERT(atomic_read(&imp->imp_refcount) == 0);
 
-        exp->exp_cookie = DEAD_HANDLE_MAGIC;
-        kmem_cache_free(export_cachep, exp);
+        ptlrpc_put_connection_superhack(imp->imp_connection);
 
+        while (!list_empty(&imp->imp_conn_list)) {
+                struct obd_import_conn *imp_conn;
+
+                imp_conn = list_entry(imp->imp_conn_list.next,
+                                      struct obd_import_conn, oic_item);
+                list_del_init(&imp_conn->oic_item);
+                ptlrpc_put_connection_superhack(imp_conn->oic_conn);
+                OBD_FREE(imp_conn, sizeof(*imp_conn));
+        }
+
+        LASSERT(imp->imp_sec == NULL);
+        class_decref(imp->imp_obd, "import", imp);
+        OBD_FREE_RCU(imp, sizeof(*imp), &imp->imp_handle);
         EXIT;
 }
 
-/* a connection defines an export context in which preallocation can
-   be managed. */
-int class_connect(struct lustre_handle *conn, struct obd_device *obd,
-                  struct obd_uuid *cluuid)
+static void import_handle_addref(void *import)
 {
-        struct obd_export * export;
-        if (conn == NULL) {
-                LBUG();
-                return -EINVAL;
-        }
+        class_import_get(import);
+}
 
-        if (obd == NULL) {
-                LBUG();
-                return -EINVAL;
+struct obd_import *class_import_get(struct obd_import *import)
+{
+        LASSERT(atomic_read(&import->imp_refcount) >= 0);
+        LASSERT(atomic_read(&import->imp_refcount) < 0x5a5a5a);
+        atomic_inc(&import->imp_refcount);
+        CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", import,
+               atomic_read(&import->imp_refcount), 
+               import->imp_obd->obd_name);
+        return import;
+}
+EXPORT_SYMBOL(class_import_get);
+
+void class_import_put(struct obd_import *imp)
+{
+        ENTRY;
+
+        LASSERT(atomic_read(&imp->imp_refcount) > 0);
+        LASSERT(atomic_read(&imp->imp_refcount) < 0x5a5a5a);
+        LASSERT(list_empty(&imp->imp_zombie_chain));
+
+        CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", imp,
+               atomic_read(&imp->imp_refcount) - 1, 
+               imp->imp_obd->obd_name);
+
+        if (atomic_dec_and_test(&imp->imp_refcount)) {
+                CDEBUG(D_INFO, "final put import %p\n", imp);
+                obd_zombie_import_add(imp);
         }
 
-        if (cluuid == NULL) {
-                LBUG();
-                return -EINVAL;
+        EXIT;
+}
+EXPORT_SYMBOL(class_import_put);
+
+static void init_imp_at(struct imp_at *at) {
+        int i;
+        at_init(&at->iat_net_latency, 0, 0);
+        for (i = 0; i < IMP_AT_MAX_PORTALS; i++) {
+                /* max service estimates are tracked on the server side, so
+                   don't use the AT history here, just use the last reported
+                   val. (But keep hist for proc histogram, worst_ever) */
+                at_init(&at->iat_service_estimate[i], INITIAL_CONNECT_TIMEOUT,
+                        AT_FLG_NOHIST);
         }
+}
 
-        export = class_new_export(obd);
-        if (!export)
-                return -ENOMEM;
+struct obd_import *class_new_import(struct obd_device *obd)
+{
+        struct obd_import *imp;
 
-        conn->addr = (__u64) (unsigned long)export;
-        conn->cookie = export->exp_cookie;
-        memcpy(&export->exp_client_uuid, cluuid, sizeof(export->exp_client_uuid));
+        OBD_ALLOC(imp, sizeof(*imp));
+        if (imp == NULL)
+                return NULL;
 
-        CDEBUG(D_IOCTL, "connect: addr %Lx cookie %Lx\n",
-               (long long)conn->addr, (long long)conn->cookie);
-        return 0;
+        CFS_INIT_LIST_HEAD(&imp->imp_zombie_chain);
+        CFS_INIT_LIST_HEAD(&imp->imp_replay_list);
+        CFS_INIT_LIST_HEAD(&imp->imp_sending_list);
+        CFS_INIT_LIST_HEAD(&imp->imp_delayed_list);
+        spin_lock_init(&imp->imp_lock);
+        imp->imp_last_success_conn = 0;
+        imp->imp_state = LUSTRE_IMP_NEW;
+        imp->imp_obd = class_incref(obd, "import", imp);
+        sema_init(&imp->imp_sec_mutex, 1);
+        cfs_waitq_init(&imp->imp_recovery_waitq);
+
+        atomic_set(&imp->imp_refcount, 2);
+        atomic_set(&imp->imp_unregistering, 0);
+        atomic_set(&imp->imp_inflight, 0);
+        atomic_set(&imp->imp_replay_inflight, 0);
+        atomic_set(&imp->imp_inval_count, 0);
+        CFS_INIT_LIST_HEAD(&imp->imp_conn_list);
+        CFS_INIT_LIST_HEAD(&imp->imp_handle.h_link);
+        class_handle_hash(&imp->imp_handle, import_handle_addref);
+        init_imp_at(&imp->imp_at);
+
+        /* the default magic is V2, will be used in connect RPC, and
+         * then adjusted according to the flags in request/reply. */
+        imp->imp_msg_magic = LUSTRE_MSG_MAGIC_V2;
+
+        return imp;
 }
+EXPORT_SYMBOL(class_new_import);
 
-int class_disconnect(struct lustre_handle *conn)
+void class_destroy_import(struct obd_import *import)
+{
+        LASSERT(import != NULL);
+        LASSERT(import != LP_POISON);
+
+        class_handle_unhash(&import->imp_handle);
+
+        spin_lock(&import->imp_lock);
+        import->imp_generation++;
+        spin_unlock(&import->imp_lock);
+        class_import_put(import);
+}
+EXPORT_SYMBOL(class_destroy_import);
+
+/* A connection defines an export context in which preallocation can
+   be managed. This releases the export pointer reference, and returns
+   the export handle, so the export refcount is 1 when this function
+   returns. */
+int class_connect(struct lustre_handle *conn, struct obd_device *obd,
+                  struct obd_uuid *cluuid)
 {
         struct obd_export *export;
+        LASSERT(conn != NULL);
+        LASSERT(obd != NULL);
+        LASSERT(cluuid != NULL);
+        ENTRY;
+
+        export = class_new_export(obd, cluuid);
+        if (IS_ERR(export))
+                RETURN(PTR_ERR(export));
+
+        conn->cookie = export->exp_handle.h_cookie;
+        class_export_put(export);
+
+        CDEBUG(D_IOCTL, "connect: client %s, cookie "LPX64"\n",
+               cluuid->uuid, conn->cookie);
+        RETURN(0);
+}
+EXPORT_SYMBOL(class_connect);
+
+/* if export is involved in recovery then clean up related things */
+void class_export_recovery_cleanup(struct obd_export *exp)
+{
+        struct obd_device *obd = exp->exp_obd;
+
+        spin_lock_bh(&obd->obd_processing_task_lock);
+        if (obd->obd_recovering && exp->exp_in_recovery) {
+                spin_lock(&exp->exp_lock);
+                exp->exp_in_recovery = 0;
+                spin_unlock(&exp->exp_lock);
+                obd->obd_connected_clients--;
+                /* each connected client is counted as recoverable */
+                obd->obd_recoverable_clients--;
+                if (exp->exp_req_replay_needed) {
+                        spin_lock(&exp->exp_lock);
+                        exp->exp_req_replay_needed = 0;
+                        spin_unlock(&exp->exp_lock);
+                        LASSERT(atomic_read(&obd->obd_req_replay_clients));
+                        atomic_dec(&obd->obd_req_replay_clients);
+                }
+                if (exp->exp_lock_replay_needed) {
+                        spin_lock(&exp->exp_lock);
+                        exp->exp_lock_replay_needed = 0;
+                        spin_unlock(&exp->exp_lock);
+                        LASSERT(atomic_read(&obd->obd_lock_replay_clients));
+                        atomic_dec(&obd->obd_lock_replay_clients);
+                }
+        }
+        spin_unlock_bh(&obd->obd_processing_task_lock);
+}
+
+/* This function removes 1-3 references from the export:
+ * 1 - for export pointer passed
+ * and if disconnect really need
+ * 2 - removing from hash
+ * 3 - in client_unlink_export
+ * The export pointer passed to this function can destroyed */
+int class_disconnect(struct obd_export *export)
+{
+        int already_disconnected;
         ENTRY;
 
-        if (!(export = class_conn2export(conn))) {
+        if (export == NULL) {
                 fixme();
-                CDEBUG(D_IOCTL, "disconnect: attempting to free "
-                       "nonexistent client "LPX64"\n", conn->addr);
+                CDEBUG(D_IOCTL, "attempting to free NULL export %p\n", export);
                 RETURN(-EINVAL);
         }
 
-        CDEBUG(D_IOCTL, "disconnect: addr %Lx cookie %Lx\n",
-                       (long long)conn->addr, (long long)conn->cookie);
+        spin_lock(&export->exp_lock);
+        already_disconnected = export->exp_disconnected;
+        export->exp_disconnected = 1;
+        spin_unlock(&export->exp_lock);
+
+        /* class_cleanup(), abort_recovery(), and class_fail_export()
+         * all end up in here, and if any of them race we shouldn't
+         * call extra class_export_puts(). */
+        if (already_disconnected) {
+                LASSERT(hlist_unhashed(&export->exp_nid_hash));
+                GOTO(no_disconn, already_disconnected);
+        }
 
-        class_destroy_export(export);
+        CDEBUG(D_IOCTL, "disconnect: cookie "LPX64"\n",
+               export->exp_handle.h_cookie);
 
+        if (!hlist_unhashed(&export->exp_nid_hash))
+                lustre_hash_del(export->exp_obd->obd_nid_hash,
+                                &export->exp_connection->c_peer.nid,
+                                &export->exp_nid_hash);
+
+        class_export_recovery_cleanup(export);
+        class_unlink_export(export);
+no_disconn:
+        class_export_put(export);
         RETURN(0);
 }
 
-void class_disconnect_all(struct obd_device *obddev)
+static void class_disconnect_export_list(struct list_head *list,
+                                         enum obd_option flags)
 {
-        int again = 1;
-
-        while (again) {
-                spin_lock(&obddev->obd_dev_lock);
-                if (!list_empty(&obddev->obd_exports)) {
-                        struct obd_export *export;
-                        struct lustre_handle conn;
-                        int rc;
+        int rc;
+        struct obd_export *exp;
+        ENTRY;
 
-                        export = list_entry(obddev->obd_exports.next,
-                                            struct obd_export,
-                                            exp_obd_chain);
-                        conn.addr = (__u64)(unsigned long)export;
-                        conn.cookie = export->exp_cookie;
-                        spin_unlock(&obddev->obd_dev_lock);
-                        CERROR("force disconnecting %s:%s export %p\n",
-                               export->exp_obd->obd_type->typ_name,
-                               export->exp_connection ?
-                               (char *)export->exp_connection->c_remote_uuid.uuid :
-                               "<unconnected>", export);
-                        rc = obd_disconnect(&conn);
-                        if (rc < 0) {
-                                /* AED: not so sure about this...  We can't
-                                 * loop here forever, yet we shouldn't leak
-                                 * exports on a struct we will soon destroy.
-                                 */
-                                CERROR("destroy export %p with err: rc = %d\n",
-                                       export, rc);
-                                class_destroy_export(export);
-                        }
-                } else {
-                        spin_unlock(&obddev->obd_dev_lock);
-                        again = 0;
+        /* It's possible that an export may disconnect itself, but
+         * nothing else will be added to this list. */
+        while (!list_empty(list)) {
+                exp = list_entry(list->next, struct obd_export, exp_obd_chain);
+                /* need for safe call CDEBUG after obd_disconnect */
+                class_export_get(exp);
+
+                spin_lock(&exp->exp_lock);
+                exp->exp_flags = flags;
+                spin_unlock(&exp->exp_lock);
+
+                if (obd_uuid_equals(&exp->exp_client_uuid,
+                                    &exp->exp_obd->obd_uuid)) {
+                        CDEBUG(D_HA,
+                               "exp %p export uuid == obd uuid, don't discon\n",
+                               exp);
+                        /* Need to delete this now so we don't end up pointing
+                         * to work_list later when this export is cleaned up. */
+                        list_del_init(&exp->exp_obd_chain);
+                        class_export_put(exp);
+                        continue;
                 }
+
+                class_export_get(exp);
+                CDEBUG(D_HA, "%s: disconnecting export at %s (%p), "
+                       "last request at "CFS_TIME_T"\n",
+                       exp->exp_obd->obd_name, obd_export_nid2str(exp),
+                       exp, exp->exp_last_request_time);
+                /* release one export reference anyway */
+                rc = obd_disconnect(exp);
+
+                CDEBUG(D_HA, "disconnected export at %s (%p): rc %d\n",
+                       obd_export_nid2str(exp), exp, rc);
+                class_export_put(exp);
         }
+        EXIT;
 }
 
-#if 0
+void class_disconnect_exports(struct obd_device *obd)
+{
+        struct list_head work_list;
+        ENTRY;
+
+        /* Move all of the exports from obd_exports to a work list, en masse. */
+        CFS_INIT_LIST_HEAD(&work_list);
+        spin_lock(&obd->obd_dev_lock);
+        list_splice_init(&obd->obd_exports, &work_list);
+        list_splice_init(&obd->obd_delayed_exports, &work_list);
+        spin_unlock(&obd->obd_dev_lock);
+
+        if (!list_empty(&work_list)) {
+                CDEBUG(D_HA, "OBD device %d (%p) has exports, "
+                       "disconnecting them\n", obd->obd_minor, obd);
+                class_disconnect_export_list(&work_list,
+                                             exp_flags_from_obd(obd));
+        } else
+                CDEBUG(D_HA, "OBD device %d (%p) has no exports\n",
+                       obd->obd_minor, obd);
+        EXIT;
+}
+EXPORT_SYMBOL(class_disconnect_exports);
 
-/* FIXME: Data is a space- or comma-separated list of device IDs.  This will
- * have to change. */
-int class_multi_setup(struct obd_device *obddev, uint32_t len, void *data)
+/* Remove exports that have not completed recovery.
+ */
+void class_disconnect_stale_exports(struct obd_device *obd,
+                                    int (*test_export)(struct obd_export *),
+                                    enum obd_option flags)
 {
-        int count, rc;
-        char *p;
+        struct list_head work_list;
+        struct list_head *pos, *n;
+        struct obd_export *exp;
         ENTRY;
 
-        for (p = data, count = 0; p < (char *)data + len; count++) {
-                char *end;
-                int tmp = simple_strtoul(p, &end, 0);
+        CFS_INIT_LIST_HEAD(&work_list);
+        spin_lock(&obd->obd_dev_lock);
+        obd->obd_stale_clients = 0;
+        list_for_each_safe(pos, n, &obd->obd_exports) {
+                exp = list_entry(pos, struct obd_export, exp_obd_chain);
+                if (test_export(exp))
+                        continue;
+
+                list_move(&exp->exp_obd_chain, &work_list);
+                /* don't count self-export as client */
+                if (obd_uuid_equals(&exp->exp_client_uuid,
+                                     &exp->exp_obd->obd_uuid))
+                        continue;
+
+                obd->obd_stale_clients++;
+                CDEBUG(D_ERROR, "%s: disconnect stale client %s@%s\n",
+                       obd->obd_name, exp->exp_client_uuid.uuid,
+                       exp->exp_connection == NULL ? "<unknown>" :
+                       libcfs_nid2str(exp->exp_connection->c_peer.nid));
+        }
+        spin_unlock(&obd->obd_dev_lock);
 
-                if (p == end) {
-                        CERROR("invalid device ID starting at: %s\n", p);
-                        GOTO(err_disconnect, rc = -EINVAL);
-                }
+        CDEBUG(D_HA, "%s: disconnecting %d stale clients\n", obd->obd_name,
+               obd->obd_stale_clients);
+
+        class_disconnect_export_list(&work_list, flags);
+        EXIT;
+}
+EXPORT_SYMBOL(class_disconnect_stale_exports);
+
+void class_fail_export(struct obd_export *exp)
+{
+        int rc, already_failed;
+
+        spin_lock(&exp->exp_lock);
+        already_failed = exp->exp_failed;
+        exp->exp_failed = 1;
+        spin_unlock(&exp->exp_lock);
+
+        if (already_failed) {
+                CDEBUG(D_HA, "disconnecting dead export %p/%s; skipping\n",
+                       exp, exp->exp_client_uuid.uuid);
+                return;
+        }
+
+        CDEBUG(D_HA, "disconnecting export %p/%s\n",
+               exp, exp->exp_client_uuid.uuid);
+
+        if (obd_dump_on_timeout)
+                libcfs_debug_dumplog();
+
+        /* Most callers into obd_disconnect are removing their own reference
+         * (request, for example) in addition to the one from the hash table.
+         * We don't have such a reference here, so make one. */
+        class_export_get(exp);
+        rc = obd_disconnect(exp);
+        if (rc)
+                CERROR("disconnecting export %p failed: %d\n", exp, rc);
+        else
+                CDEBUG(D_HA, "disconnected export %p/%s\n",
+                       exp, exp->exp_client_uuid.uuid);
+}
+EXPORT_SYMBOL(class_fail_export);
+
+char *obd_export_nid2str(struct obd_export *exp)
+{
+        if (exp->exp_connection != NULL)
+                return libcfs_nid2str(exp->exp_connection->c_peer.nid);
+
+        return "(no nid)";
+}
+EXPORT_SYMBOL(obd_export_nid2str);
+
+int obd_export_evict_by_nid(struct obd_device *obd, const char *nid)
+{
+        struct obd_export *doomed_exp = NULL;
+        int exports_evicted = 0;
+
+        lnet_nid_t nid_key = libcfs_str2nid((char *)nid);
+
+        do {
+                doomed_exp = lustre_hash_lookup(obd->obd_nid_hash, &nid_key);
+                if (doomed_exp == NULL)
+                        break;
+
+                LASSERTF(doomed_exp->exp_connection->c_peer.nid == nid_key,
+                         "nid %s found, wanted nid %s, requested nid %s\n",
+                         obd_export_nid2str(doomed_exp),
+                         libcfs_nid2str(nid_key), nid);
+                LASSERTF(doomed_exp != obd->obd_self_export,
+                         "self-export is hashed by NID?\n");
+                exports_evicted++;
+                CWARN("%s: evict NID '%s' (%s) #%d at adminstrative request\n",
+                       obd->obd_name, nid, doomed_exp->exp_client_uuid.uuid,
+                       exports_evicted);
+                class_fail_export(doomed_exp);
+                class_export_put(doomed_exp);
+        } while (1);
+
+        if (!exports_evicted)
+                CDEBUG(D_HA,"%s: can't disconnect NID '%s': no exports found\n",
+                       obd->obd_name, nid);
+        return exports_evicted;
+}
+EXPORT_SYMBOL(obd_export_evict_by_nid);
+
+int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid)
+{
+        struct obd_export *doomed_exp = NULL;
+        struct obd_uuid doomed_uuid;
+        int exports_evicted = 0;
+
+        obd_str2uuid(&doomed_uuid, uuid);
+        if (obd_uuid_equals(&doomed_uuid, &obd->obd_uuid)) {
+                CERROR("%s: can't evict myself\n", obd->obd_name);
+                return exports_evicted;
+        }
+
+        doomed_exp = lustre_hash_lookup(obd->obd_uuid_hash, &doomed_uuid);
+
+        if (doomed_exp == NULL) {
+                CERROR("%s: can't disconnect %s: no exports found\n",
+                       obd->obd_name, uuid);
+        } else {
+                CWARN("%s: evicting %s at adminstrative request\n",
+                       obd->obd_name, doomed_exp->exp_client_uuid.uuid);
+                class_fail_export(doomed_exp);
+                class_export_put(doomed_exp);
+                exports_evicted++;
+        }
+
+        return exports_evicted;
+}
+EXPORT_SYMBOL(obd_export_evict_by_uuid);
+
+/**
+ * kill zombie imports and exports
+ */
+void obd_zombie_impexp_cull(void)
+{
+        struct obd_import *import;
+        struct obd_export *export;
+        ENTRY;
 
-                if (tmp < 0 || tmp >= MAX_OBD_DEVICES) {
-                        CERROR("Trying to sub dev %d  - dev no too large\n",
-                               tmp);
-                        GOTO(err_disconnect, rc  = -EINVAL);
+        do {
+                spin_lock(&obd_zombie_impexp_lock);
+
+                import = NULL;
+                if (!list_empty(&obd_zombie_imports)) {
+                        import = list_entry(obd_zombie_imports.next,
+                                            struct obd_import,
+                                            imp_zombie_chain);
+                        list_del_init(&import->imp_zombie_chain);
                 }
 
-                rc = obd_connect(&obddev->obd_multi_conn[count], &obd_dev[tmp]);
-                if (rc) {
-                        CERROR("cannot connect to device %d: rc = %d\n", tmp,
-                               rc);
-                        GOTO(err_disconnect, rc);
+                export = NULL;
+                if (!list_empty(&obd_zombie_exports)) {
+                        export = list_entry(obd_zombie_exports.next,
+                                            struct obd_export,
+                                            exp_obd_chain);
+                        list_del_init(&export->exp_obd_chain);
                 }
 
-                CDEBUG(D_INFO, "target OBD %d is of type %s\n", count,
-                       obd_dev[tmp].obd_type->typ_name);
+                spin_unlock(&obd_zombie_impexp_lock);
+
+                if (import != NULL)
+                        class_import_destroy(import);
+
+                if (export != NULL)
+                        class_export_destroy(export);
+
+        } while (import != NULL || export != NULL);
+        EXIT;
+}
+
+static struct completion        obd_zombie_start;
+static struct completion        obd_zombie_stop;
+static unsigned long            obd_zombie_flags;
+static cfs_waitq_t              obd_zombie_waitq;
+
+enum {
+        OBD_ZOMBIE_STOP   = 1 << 1
+};
+
+/**
+ * check for work for kill zombie import/export thread.
+ */
+static int obd_zombie_impexp_check(void *arg)
+{
+        int rc;
+
+        spin_lock(&obd_zombie_impexp_lock);
+        rc = list_empty(&obd_zombie_imports) &&
+             list_empty(&obd_zombie_exports) &&
+             !test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags);
+
+        spin_unlock(&obd_zombie_impexp_lock);
+
+        RETURN(rc);
+}
+
+/**
+ * Add export to the obd_zombe thread and notify it.
+ */
+static void obd_zombie_export_add(struct obd_export *exp) {
+        spin_lock(&obd_zombie_impexp_lock);
+        LASSERT(list_empty(&exp->exp_obd_chain));
+        list_add(&exp->exp_obd_chain, &obd_zombie_exports);
+        spin_unlock(&obd_zombie_impexp_lock);
+
+        if (obd_zombie_impexp_notify != NULL)
+                obd_zombie_impexp_notify();
+}
+
+/**
+ * Add import to the obd_zombe thread and notify it.
+ */
+static void obd_zombie_import_add(struct obd_import *imp) {
+        LASSERT(imp->imp_sec == NULL);
+        spin_lock(&obd_zombie_impexp_lock);
+        LASSERT(list_empty(&imp->imp_zombie_chain));
+        list_add(&imp->imp_zombie_chain, &obd_zombie_imports);
+        spin_unlock(&obd_zombie_impexp_lock);
+
+        if (obd_zombie_impexp_notify != NULL)
+                obd_zombie_impexp_notify();
+}
+
+/**
+ * notify import/export destroy thread about new zombie.
+ */
+static void obd_zombie_impexp_notify(void)
+{
+        cfs_waitq_signal(&obd_zombie_waitq);
+}
+
+/**
+ * check whether obd_zombie is idle
+ */
+static int obd_zombie_is_idle(void)
+{
+        int rc;
+
+        LASSERT(!test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags));
+        spin_lock(&obd_zombie_impexp_lock);
+        rc = list_empty(&obd_zombie_imports) &&
+             list_empty(&obd_zombie_exports);
+        spin_unlock(&obd_zombie_impexp_lock);
+        return rc;
+}
 
-                p = end + 1;
+/**
+ * wait when obd_zombie import/export queues become empty
+ */
+void obd_zombie_barrier(void)
+{
+        struct l_wait_info lwi = { 0 };
+        l_wait_event(obd_zombie_waitq, obd_zombie_is_idle(), &lwi);
+}
+EXPORT_SYMBOL(obd_zombie_barrier);
+
+#ifdef __KERNEL__
+
+/**
+ * destroy zombie export/import thread.
+ */
+static int obd_zombie_impexp_thread(void *unused)
+{
+        int rc;
+
+        if ((rc = cfs_daemonize_ctxt("obd_zombid"))) {
+                complete(&obd_zombie_start);
+                RETURN(rc);
+        }
+
+        complete(&obd_zombie_start);
+
+        while(!test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags)) {
+                struct l_wait_info lwi = { 0 };
+
+                l_wait_event(obd_zombie_waitq, 
+                             !obd_zombie_impexp_check(NULL), &lwi);
+                obd_zombie_impexp_cull();
+
+                /* 
+                 * Notify obd_zombie_barrier callers that queues
+                 * may be empty.
+                 */
+                cfs_waitq_signal(&obd_zombie_waitq);
         }
 
-        obddev->obd_multi_count = count;
+        complete(&obd_zombie_stop);
 
         RETURN(0);
+}
 
- err_disconnect:
-        for (count--; count >= 0; count--)
-                obd_disconnect(&obddev->obd_multi_conn[count]);
+#else /* ! KERNEL */
+
+static atomic_t zombie_recur = ATOMIC_INIT(0);
+static void *obd_zombie_impexp_work_cb;
+static void *obd_zombie_impexp_idle_cb;
+
+int obd_zombie_impexp_kill(void *arg)
+{
+        int rc = 0;
+
+       if (atomic_inc_return(&zombie_recur) == 1) {
+                obd_zombie_impexp_cull();
+                rc = 1;
+        }
+        atomic_dec(&zombie_recur);
         return rc;
 }
 
-/*
- *    remove all connections to this device
- *    close all connections to lower devices
- *    needed for forced unloads of OBD client drivers
+#endif
+
+/**
+ * start destroy zombie import/export thread
  */
-int class_multi_cleanup(struct obd_device *obddev)
+int obd_zombie_impexp_init(void)
 {
-        int i;
+        int rc;
 
-        for (i = 0; i < obddev->obd_multi_count; i++) {
-                int rc;
-                struct obd_device *obd =
-                        class_conn2obd(&obddev->obd_multi_conn[i]);
+        CFS_INIT_LIST_HEAD(&obd_zombie_imports);
+        CFS_INIT_LIST_HEAD(&obd_zombie_exports);
+        spin_lock_init(&obd_zombie_impexp_lock);
+        init_completion(&obd_zombie_start);
+        init_completion(&obd_zombie_stop);
+        cfs_waitq_init(&obd_zombie_waitq);
 
-                if (!obd) {
-                        CERROR("no such device [i %d]\n", i);
-                        RETURN(-EINVAL);
-                }
+#ifdef __KERNEL__
+        rc = cfs_kernel_thread(obd_zombie_impexp_thread, NULL, 0);
+        if (rc < 0)
+                RETURN(rc);
 
-                rc = obd_disconnect(&obddev->obd_multi_conn[i]);
-                if (rc)
-                        CERROR("disconnect failure %d\n", obd->obd_minor);
-        }
-        return 0;
+        wait_for_completion(&obd_zombie_start);
+#else
+
+        obd_zombie_impexp_work_cb =
+                liblustre_register_wait_callback("obd_zombi_impexp_kill",
+                                                 &obd_zombie_impexp_kill, NULL);
+
+        obd_zombie_impexp_idle_cb =
+                liblustre_register_idle_callback("obd_zombi_impexp_check",
+                                                 &obd_zombie_impexp_check, NULL);
+        rc = 0;
+#endif
+        RETURN(rc);
 }
+/**
+ * stop destroy zombie import/export thread
+ */
+void obd_zombie_impexp_stop(void)
+{
+        set_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags);
+        obd_zombie_impexp_notify();
+#ifdef __KERNEL__
+        wait_for_completion(&obd_zombie_stop);
+#else
+        liblustre_deregister_wait_callback(obd_zombie_impexp_work_cb);
+        liblustre_deregister_idle_callback(obd_zombie_impexp_idle_cb);
 #endif
+}