Whamcloud - gitweb
get rid of trailing whitespaces.
[fs/lustre-release.git] / lustre / obdclass / genops.c
index d2ec554..d015eff 100644 (file)
@@ -1,25 +1,39 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * GPL HEADER START
  *
- *   This file is part of the Lustre file system, http://www.lustre.org
- *   Lustre is a trademark of Cluster File Systems, Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
- *   You may have signed or agreed to another license before downloading
- *   this software.  If so, you are bound by the terms and conditions
- *   of that agreement, and the following does not apply to you.  See the
- *   LICENSE file included with this distribution for more information.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
  *
- *   If you did not agree to a different license, then this copy of Lustre
- *   is open source software; you can redistribute it and/or modify it
- *   under the terms of version 2 of the GNU General Public License as
- *   published by the Free Software Foundation.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
  *
- *   In either case, Lustre is distributed in the hope that it will be
- *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
- *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   license text for more details.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see [sun.com URL with a
+ * copy of GPLv2].
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/genops.c
  *
  * These are the only exported functions, they provide some generic
  * infrastructure for managing object devices
@@ -45,9 +59,7 @@ cfs_mem_cache_t *import_cachep;
 struct list_head  obd_zombie_imports;
 struct list_head  obd_zombie_exports;
 spinlock_t        obd_zombie_impexp_lock;
-void            (*obd_zombie_impexp_notify)(void) = NULL;
-EXPORT_SYMBOL(obd_zombie_impexp_notify);
-
+static void obd_zombie_impexp_notify(void);
 
 int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
 
@@ -59,7 +71,7 @@ static struct obd_device *obd_device_alloc(void)
 {
         struct obd_device *obd;
 
-        OBD_SLAB_ALLOC(obd, obd_device_cachep, SLAB_KERNEL, sizeof(*obd));
+        OBD_SLAB_ALLOC_PTR(obd, obd_device_cachep);
         if (obd != NULL) {
                 obd->obd_magic = OBD_DEVICE_MAGIC;
         }
@@ -70,9 +82,14 @@ EXPORT_SYMBOL(obd_device_alloc);
 static void obd_device_free(struct obd_device *obd)
 {
         LASSERT(obd != NULL);
-        LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, "obd %p obd_magic %08x != %08x\n", 
+        LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, "obd %p obd_magic %08x != %08x\n",
                  obd, obd->obd_magic, OBD_DEVICE_MAGIC);
-        OBD_SLAB_FREE(obd, obd_device_cachep, sizeof(*obd));
+        if (obd->obd_namespace != NULL) {
+                CERROR("obd %p: namespace %p was not properly cleaned up (obd_force=%d)!\n",
+                       obd, obd->obd_namespace, obd->obd_force);
+                LBUG();
+        }
+        OBD_SLAB_FREE_PTR(obd, obd_device_cachep);
 }
 EXPORT_SYMBOL(obd_device_free);
 
@@ -100,20 +117,19 @@ struct obd_type *class_get_type(const char *name)
 #ifdef CONFIG_KMOD
         if (!type) {
                 const char *modname = name;
-                if (strcmp(modname, LUSTRE_MDT_NAME) == 0) 
-                        modname = LUSTRE_MDS_NAME;
                 if (!request_module(modname)) {
                         CDEBUG(D_INFO, "Loaded module '%s'\n", modname);
                         type = class_search_type(name);
                 } else {
-                        LCONSOLE_ERROR("Can't load module '%s'\n", modname);
+                        LCONSOLE_ERROR_MSG(0x158, "Can't load module '%s'\n",
+                                           modname);
                 }
         }
 #endif
         if (type) {
                 spin_lock(&type->obd_type_lock);
                 type->typ_refcnt++;
-                try_module_get(type->typ_ops->o_owner);
+                try_module_get(type->typ_dt_ops->o_owner);
                 spin_unlock(&type->obd_type_lock);
         }
         return type;
@@ -124,18 +140,22 @@ void class_put_type(struct obd_type *type)
         LASSERT(type);
         spin_lock(&type->obd_type_lock);
         type->typ_refcnt--;
-        module_put(type->typ_ops->o_owner);
+        module_put(type->typ_dt_ops->o_owner);
         spin_unlock(&type->obd_type_lock);
 }
 
-int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars,
-                        const char *name)
+#define CLASS_MAX_NAME 1024
+
+int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
+                        struct lprocfs_vars *vars, const char *name,
+                        struct lu_device_type *ldt)
 {
         struct obd_type *type;
         int rc = 0;
         ENTRY;
 
-        LASSERT(strnlen(name, 1024) < 1024);    /* sanity check */
+        /* sanity check */
+        LASSERT(strnlen(name, CLASS_MAX_NAME) < CLASS_MAX_NAME);
 
         if (class_search_type(name)) {
                 CDEBUG(D_IOCTL, "Type %s already registered\n", name);
@@ -147,12 +167,19 @@ int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars,
         if (type == NULL)
                 RETURN(rc);
 
-        OBD_ALLOC(type->typ_ops, sizeof(*type->typ_ops));
+        OBD_ALLOC_PTR(type->typ_dt_ops);
+        OBD_ALLOC_PTR(type->typ_md_ops);
         OBD_ALLOC(type->typ_name, strlen(name) + 1);
-        if (type->typ_ops == NULL || type->typ_name == NULL)
+
+        if (type->typ_dt_ops == NULL ||
+            type->typ_md_ops == NULL ||
+            type->typ_name == NULL)
                 GOTO (failed, rc);
 
-        *(type->typ_ops) = *ops;
+        *(type->typ_dt_ops) = *dt_ops;
+        /* md_ops is optional */
+        if (md_ops)
+                *(type->typ_md_ops) = *md_ops;
         strcpy(type->typ_name, name);
         spin_lock_init(&type->obd_type_lock);
 
@@ -165,6 +192,12 @@ int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars,
                 GOTO (failed, rc);
         }
 #endif
+        if (ldt != NULL) {
+                type->typ_lu = ldt;
+                rc = ldt->ldt_ops->ldto_init(ldt);
+                if (rc != 0)
+                        GOTO (failed, rc);
+        }
 
         spin_lock(&obd_types_lock);
         list_add(&type->typ_chain, &obd_types);
@@ -175,8 +208,10 @@ int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars,
  failed:
         if (type->typ_name != NULL)
                 OBD_FREE(type->typ_name, strlen(name) + 1);
-        if (type->typ_ops != NULL)
-                OBD_FREE (type->typ_ops, sizeof (*type->typ_ops));
+        if (type->typ_md_ops != NULL)
+                OBD_FREE_PTR(type->typ_md_ops);
+        if (type->typ_dt_ops != NULL)
+                OBD_FREE_PTR(type->typ_dt_ops);
         OBD_FREE(type, sizeof(*type));
         RETURN(rc);
 }
@@ -195,23 +230,41 @@ int class_unregister_type(const char *name)
                 CERROR("type %s has refcount (%d)\n", name, type->typ_refcnt);
                 /* This is a bad situation, let's make the best of it */
                 /* Remove ops, but leave the name for debugging */
-                OBD_FREE(type->typ_ops, sizeof(*type->typ_ops));
+                OBD_FREE_PTR(type->typ_dt_ops);
+                OBD_FREE_PTR(type->typ_md_ops);
                 RETURN(-EBUSY);
         }
 
-        if (type->typ_procroot) 
+        if (type->typ_procroot) {
                 lprocfs_remove(&type->typ_procroot);
+        }
+
+        if (type->typ_lu)
+                type->typ_lu->ldt_ops->ldto_fini(type->typ_lu);
 
         spin_lock(&obd_types_lock);
         list_del(&type->typ_chain);
         spin_unlock(&obd_types_lock);
         OBD_FREE(type->typ_name, strlen(name) + 1);
-        if (type->typ_ops != NULL)
-                OBD_FREE(type->typ_ops, sizeof(*type->typ_ops));
+        if (type->typ_dt_ops != NULL)
+                OBD_FREE_PTR(type->typ_dt_ops);
+        if (type->typ_md_ops != NULL)
+                OBD_FREE_PTR(type->typ_md_ops);
         OBD_FREE(type, sizeof(*type));
         RETURN(0);
 } /* class_unregister_type */
 
+/**
+ * Create a new obd device.
+ *
+ * Find an empty slot in ::obd_devs[], create a new obd device in it.
+ *
+ * \param typename [in] obd device type string.
+ * \param name     [in] obd device name.
+ *
+ * \retval NULL if create fails, otherwise return the obd device
+ *         pointer created.
+ */
 struct obd_device *class_newdev(const char *type_name, const char *name)
 {
         struct obd_device *result = NULL;
@@ -220,7 +273,7 @@ struct obd_device *class_newdev(const char *type_name, const char *name)
         int i;
         int new_obd_minor = 0;
 
-        if (strlen(name) > MAX_OBD_NAME) {
+        if (strlen(name) >= MAX_OBD_NAME) {
                 CERROR("name/uuid must be < %u bytes long\n", MAX_OBD_NAME);
                 RETURN(ERR_PTR(-EINVAL));
         }
@@ -232,7 +285,7 @@ struct obd_device *class_newdev(const char *type_name, const char *name)
         }
 
         newdev = obd_device_alloc();
-        if (newdev == NULL) { 
+        if (newdev == NULL) {
                 class_put_type(type);
                 RETURN(ERR_PTR(-ENOMEM));
         }
@@ -241,7 +294,8 @@ struct obd_device *class_newdev(const char *type_name, const char *name)
         spin_lock(&obd_dev_lock);
         for (i = 0; i < class_devno_max(); i++) {
                 struct obd_device *obd = class_num2obd(i);
-                if (obd && obd->obd_name && (strcmp(name, obd->obd_name) == 0)){
+                if (obd && obd->obd_name &&
+                    (strcmp(name, obd->obd_name) == 0)) {
                         CERROR("Device %s already exists, won't add\n", name);
                         if (result) {
                                 LASSERTF(result->obd_magic == OBD_DEVICE_MAGIC,
@@ -253,7 +307,7 @@ struct obd_device *class_newdev(const char *type_name, const char *name)
 
                                 obd_devs[result->obd_minor] = NULL;
                                 result->obd_name[0]='\0';
-                        }
+                         }
                         result = ERR_PTR(-EEXIST);
                         break;
                 }
@@ -262,18 +316,19 @@ struct obd_device *class_newdev(const char *type_name, const char *name)
                         result->obd_minor = i;
                         new_obd_minor = i;
                         result->obd_type = type;
-                        memcpy(result->obd_name, name, strlen(name));
+                        strncpy(result->obd_name, name,
+                                sizeof(result->obd_name));
                         obd_devs[i] = result;
                 }
         }
         spin_unlock(&obd_dev_lock);
-        
+
         if (result == NULL && i >= class_devno_max()) {
                 CERROR("all %u OBD devices used, increase MAX_OBD_DEVICES\n",
                        class_devno_max());
                 result = ERR_PTR(-EOVERFLOW);
         }
-        
+
         if (IS_ERR(result)) {
                 obd_device_free(newdev);
                 class_put_type(type);
@@ -366,15 +421,22 @@ struct obd_device *class_uuid2obd(struct obd_uuid *uuid)
         return class_num2obd(dev);
 }
 
+/**
+ * Get obd device from ::obd_devs[]
+ *
+ * \param num [in] array index
+ *
+ * \retval NULL if ::obd_devs[\a num] does not contains an obd device
+ *         otherwise return the obd device there.
+ */
 struct obd_device *class_num2obd(int num)
 {
         struct obd_device *obd = NULL;
 
         if (num < class_devno_max()) {
                 obd = obd_devs[num];
-                if (obd == NULL) {
+                if (obd == NULL)
                         return NULL;
-                }
 
                 LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC,
                          "%p obd_magic %08x != %08x\n",
@@ -509,6 +571,11 @@ void obd_cleanup_caches(void)
                 LASSERTF(rc == 0, "Cannot destory ll_import_cache\n");
                 import_cachep = NULL;
         }
+        if (capa_cachep) {
+                rc = cfs_mem_cache_destroy(capa_cachep);
+                LASSERTF(rc == 0, "Cannot destory capa_cache\n");
+                capa_cachep = NULL;
+        }
         EXIT;
 }
 
@@ -518,23 +585,30 @@ int obd_init_caches(void)
 
         LASSERT(obd_device_cachep == NULL);
         obd_device_cachep = cfs_mem_cache_create("ll_obd_dev_cache",
-                                              sizeof(struct obd_device), 0, 0);
+                                                 sizeof(struct obd_device),
+                                                 0, 0);
         if (!obd_device_cachep)
                 GOTO(out, -ENOMEM);
 
         LASSERT(obdo_cachep == NULL);
         obdo_cachep = cfs_mem_cache_create("ll_obdo_cache", sizeof(struct obdo),
-                                        0, 0);
+                                           0, 0);
         if (!obdo_cachep)
                 GOTO(out, -ENOMEM);
 
         LASSERT(import_cachep == NULL);
         import_cachep = cfs_mem_cache_create("ll_import_cache",
-                                          sizeof(struct obd_import),
-                                          0, 0);
+                                             sizeof(struct obd_import),
+                                             0, 0);
         if (!import_cachep)
                 GOTO(out, -ENOMEM);
 
+        LASSERT(capa_cachep == NULL);
+        capa_cachep = cfs_mem_cache_create("capa_cache",
+                                           sizeof(struct obd_capa), 0, 0);
+        if (!capa_cachep)
+                GOTO(out, -ENOMEM);
+
         RETURN(0);
  out:
         obd_cleanup_caches();
@@ -611,7 +685,7 @@ void __class_export_put(struct obd_export *exp)
 
                 CDEBUG(D_IOCTL, "final put %p/%s\n",
                        exp, exp->exp_client_uuid.uuid);
-        
+
                 spin_lock(&obd_zombie_impexp_lock);
                 list_add(&exp->exp_obd_chain, &obd_zombie_exports);
                 spin_unlock(&obd_zombie_impexp_lock);
@@ -625,11 +699,12 @@ EXPORT_SYMBOL(__class_export_put);
 void class_export_destroy(struct obd_export *exp)
 {
         struct obd_device *obd = exp->exp_obd;
+        ENTRY;
 
         LASSERT (atomic_read(&exp->exp_refcount) == 0);
 
-        CDEBUG(D_IOCTL, "destroying export %p/%s\n", exp,
-               exp->exp_client_uuid.uuid);
+        CDEBUG(D_IOCTL, "destroying export %p/%s for %s\n", exp,
+               exp->exp_client_uuid.uuid, obd->obd_name);
 
         LASSERT(obd != NULL);
 
@@ -638,11 +713,12 @@ void class_export_destroy(struct obd_export *exp)
                 ptlrpc_put_connection_superhack(exp->exp_connection);
 
         LASSERT(list_empty(&exp->exp_outstanding_replies));
-        LASSERT(list_empty(&exp->exp_handle.h_link));
+        LASSERT(list_empty(&exp->exp_req_replay_queue));
         obd_destroy_export(exp);
 
-        OBD_FREE(exp, sizeof(*exp));
+        OBD_FREE_RCU(exp, sizeof(*exp), &exp->exp_handle);
         class_decref(obd);
+        EXIT;
 }
 
 /* Creates a new export, adds it to the hash table, and returns a
@@ -654,7 +730,7 @@ struct obd_export *class_new_export(struct obd_device *obd,
         struct obd_export *export;
         int rc = 0;
 
-        OBD_ALLOC(export, sizeof(*export));
+        OBD_ALLOC_PTR(export);
         if (!export)
                 return ERR_PTR(-ENOMEM);
 
@@ -663,31 +739,37 @@ struct obd_export *class_new_export(struct obd_device *obd,
         atomic_set(&export->exp_rpc_count, 0);
         export->exp_obd = obd;
         CFS_INIT_LIST_HEAD(&export->exp_outstanding_replies);
+        CFS_INIT_LIST_HEAD(&export->exp_req_replay_queue);
         /* XXX this should be in LDLM init */
         CFS_INIT_LIST_HEAD(&export->exp_ldlm_data.led_held_locks);
         spin_lock_init(&export->exp_ldlm_data.led_lock);
 
         CFS_INIT_LIST_HEAD(&export->exp_handle.h_link);
         class_handle_hash(&export->exp_handle, export_handle_addref);
-        export->exp_last_request_time = CURRENT_SECONDS;
+        export->exp_last_request_time = cfs_time_current_sec();
         spin_lock_init(&export->exp_lock);
+        INIT_HLIST_NODE(&export->exp_uuid_hash);
+        INIT_HLIST_NODE(&export->exp_nid_hash);
 
+        export->exp_sp_peer = LUSTRE_SP_ANY;
+        export->exp_flvr.sf_rpc = SPTLRPC_FLVR_INVALID;
         export->exp_client_uuid = *cluuid;
         obd_init_export(export);
 
+        spin_lock(&obd->obd_dev_lock);
         if (!obd_uuid_equals(cluuid, &obd->obd_uuid)) {
-               rc = lustre_hash_additem_unique(obd->obd_uuid_hash_body, cluuid, 
+               rc = lustre_hash_additem_unique(obd->obd_uuid_hash_body, cluuid,
                                                &export->exp_uuid_hash);
                if (rc != 0) {
                        CWARN("%s: denying duplicate export for %s\n",
                              obd->obd_name, cluuid->uuid);
+                       spin_unlock(&obd->obd_dev_lock);
                        class_handle_unhash(&export->exp_handle);
                        OBD_FREE_PTR(export);
                        return ERR_PTR(-EALREADY);
                }
         }
 
-        spin_lock(&obd->obd_dev_lock);
         LASSERT(!obd->obd_stopping); /* shouldn't happen, but might race */
         class_incref(obd);
         list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports);
@@ -707,7 +789,7 @@ void class_unlink_export(struct obd_export *exp)
         spin_lock(&exp->exp_obd->obd_dev_lock);
         /* delete an uuid-export hashitem from hashtables */
         if (!hlist_unhashed(&exp->exp_uuid_hash)) {
-                lustre_hash_delitem(exp->exp_obd->obd_uuid_hash_body, 
+                lustre_hash_delitem(exp->exp_obd->obd_uuid_hash_body,
                                     &exp->exp_client_uuid, &exp->exp_uuid_hash);
         }
         list_del_init(&exp->exp_obd_chain);
@@ -750,7 +832,7 @@ void class_import_put(struct obd_import *import)
         if (atomic_dec_and_test(&import->imp_refcount)) {
 
                 CDEBUG(D_INFO, "final put import %p\n", import);
-                
+
                 spin_lock(&obd_zombie_impexp_lock);
                 list_add(&import->imp_zombie_chain, &obd_zombie_imports);
                 spin_unlock(&obd_zombie_impexp_lock);
@@ -761,12 +843,14 @@ void class_import_put(struct obd_import *import)
 
         EXIT;
 }
+EXPORT_SYMBOL(class_import_put);
 
 void class_import_destroy(struct obd_import *import)
 {
         ENTRY;
-        
-        CDEBUG(D_IOCTL, "destroying import %p\n", import);
+
+        CDEBUG(D_IOCTL, "destroying import %p for %s\n", import,
+                import->imp_obd->obd_name);
 
         LASSERT(atomic_read(&import->imp_refcount) == 0);
 
@@ -782,13 +866,23 @@ void class_import_destroy(struct obd_import *import)
                 OBD_FREE(imp_conn, sizeof(*imp_conn));
         }
 
-        LASSERT(list_empty(&import->imp_handle.h_link));
+        LASSERT(import->imp_sec == NULL);
         class_decref(import->imp_obd);
-        OBD_FREE(import, sizeof(*import));
-
+        OBD_FREE_RCU(import, sizeof(*import), &import->imp_handle);
         EXIT;
 }
-EXPORT_SYMBOL(class_import_put);
+
+static void init_imp_at(struct imp_at *at) {
+        int i;
+        at_init(&at->iat_net_latency, 0, 0);
+        for (i = 0; i < IMP_AT_MAX_PORTALS; i++) {
+                /* max service estimates are tracked on the server side, so
+                   don't use the AT history here, just use the last reported
+                   val. (But keep hist for proc histogram, worst_ever) */
+                at_init(&at->iat_service_estimate[i], INITIAL_CONNECT_TIMEOUT,
+                        AT_FLG_NOHIST);
+        }
+}
 
 struct obd_import *class_new_import(struct obd_device *obd)
 {
@@ -806,18 +900,21 @@ struct obd_import *class_new_import(struct obd_device *obd)
         imp->imp_last_success_conn = 0;
         imp->imp_state = LUSTRE_IMP_NEW;
         imp->imp_obd = class_incref(obd);
+        sema_init(&imp->imp_sec_mutex, 1);
         cfs_waitq_init(&imp->imp_recovery_waitq);
 
         atomic_set(&imp->imp_refcount, 2);
         atomic_set(&imp->imp_inflight, 0);
         atomic_set(&imp->imp_replay_inflight, 0);
+        atomic_set(&imp->imp_inval_count, 0);
         CFS_INIT_LIST_HEAD(&imp->imp_conn_list);
         CFS_INIT_LIST_HEAD(&imp->imp_handle.h_link);
         class_handle_hash(&imp->imp_handle, import_handle_addref);
+        init_imp_at(&imp->imp_at);
 
-        /* the default magic is V1, will be used in connect RPC, and
+        /* the default magic is V2, will be used in connect RPC, and
          * then adjusted according to the flags in request/reply. */
-        imp->imp_msg_magic = LUSTRE_MSG_MAGIC_V1;
+        imp->imp_msg_magic = LUSTRE_MSG_MAGIC_V2;
 
         return imp;
 }
@@ -830,7 +927,9 @@ void class_destroy_import(struct obd_import *import)
 
         class_handle_unhash(&import->imp_handle);
 
+        spin_lock(&import->imp_lock);
         import->imp_generation++;
+        spin_unlock(&import->imp_lock);
         class_import_put(import);
 }
 EXPORT_SYMBOL(class_destroy_import);
@@ -861,6 +960,37 @@ int class_connect(struct lustre_handle *conn, struct obd_device *obd,
 }
 EXPORT_SYMBOL(class_connect);
 
+/* if export is involved in recovery then clean up related things */
+void class_export_recovery_cleanup(struct obd_export *exp)
+{
+        struct obd_device *obd = exp->exp_obd;
+
+        spin_lock_bh(&obd->obd_processing_task_lock);
+        if (obd->obd_recovering && exp->exp_in_recovery) {
+                spin_lock(&exp->exp_lock);
+                exp->exp_in_recovery = 0;
+                spin_unlock(&exp->exp_lock);
+                obd->obd_connected_clients--;
+                /* each connected client is counted as recoverable */
+                obd->obd_recoverable_clients--;
+                if (exp->exp_req_replay_needed) {
+                        spin_lock(&exp->exp_lock);
+                        exp->exp_req_replay_needed = 0;
+                        spin_unlock(&exp->exp_lock);
+                        LASSERT(atomic_read(&obd->obd_req_replay_clients));
+                        atomic_dec(&obd->obd_req_replay_clients);
+                }
+                if (exp->exp_lock_replay_needed) {
+                        spin_lock(&exp->exp_lock);
+                        exp->exp_lock_replay_needed = 0;
+                        spin_unlock(&exp->exp_lock);
+                        LASSERT(atomic_read(&obd->obd_lock_replay_clients));
+                        atomic_dec(&obd->obd_lock_replay_clients);
+                }
+        }
+        spin_unlock_bh(&obd->obd_processing_task_lock);
+}
+
 /* This function removes two references from the export: one for the
  * hash entry and one for the export pointer passed in.  The export
  * pointer passed to this function is destroyed should not be used
@@ -895,6 +1025,7 @@ int class_disconnect(struct obd_export *export)
         CDEBUG(D_IOCTL, "disconnect: cookie "LPX64"\n",
                export->exp_handle.h_cookie);
 
+        class_export_recovery_cleanup(export);
         class_unlink_export(export);
         class_export_put(export);
         RETURN(0);
@@ -912,7 +1043,10 @@ static void class_disconnect_export_list(struct list_head *list, int flags)
         while (!list_empty(list)) {
                 exp = list_entry(list->next, struct obd_export, exp_obd_chain);
                 class_export_get(exp);
+
+                spin_lock(&exp->exp_lock);
                 exp->exp_flags = flags;
+                spin_unlock(&exp->exp_lock);
 
                 if (obd_uuid_equals(&exp->exp_client_uuid,
                                     &exp->exp_obd->obd_uuid)) {
@@ -932,15 +1066,17 @@ static void class_disconnect_export_list(struct list_head *list, int flags)
                         class_export_put(exp);
                         continue;
                 }
+
+                spin_lock(&fake_exp->exp_lock);
                 fake_exp->exp_flags = flags;
+                spin_unlock(&fake_exp->exp_lock);
+
+                CDEBUG(D_HA, "%s: disconnecting export at %s (%p), "
+                       "last request at %ld\n",
+                       exp->exp_obd->obd_name, obd_export_nid2str(exp),
+                       exp, exp->exp_last_request_time);
                 rc = obd_disconnect(fake_exp);
                 class_export_put(exp);
-                if (rc) {
-                        CDEBUG(D_HA, "disconnecting export %p failed: %d\n",
-                               exp, rc);
-                } else {
-                        CDEBUG(D_HA, "export %p disconnected\n", exp);
-                }
         }
         EXIT;
 }
@@ -962,16 +1098,22 @@ void class_disconnect_exports(struct obd_device *obd)
         list_del_init(&obd->obd_exports);
         spin_unlock(&obd->obd_dev_lock);
 
-        CDEBUG(D_HA, "OBD device %d (%p) has exports, "
-               "disconnecting them\n", obd->obd_minor, obd);
-        class_disconnect_export_list(&work_list, get_exp_flags_from_obd(obd));
+        if (!list_empty(&work_list)) {
+                CDEBUG(D_HA, "OBD device %d (%p) has exports, "
+                       "disconnecting them\n", obd->obd_minor, obd);
+                class_disconnect_export_list(&work_list,
+                                             get_exp_flags_from_obd(obd));
+        } else
+                CDEBUG(D_HA, "OBD device %d (%p) has no exports\n",
+                       obd->obd_minor, obd);
         EXIT;
 }
 EXPORT_SYMBOL(class_disconnect_exports);
 
 /* Remove exports that have not completed recovery.
  */
-void class_disconnect_stale_exports(struct obd_device *obd)
+int class_disconnect_stale_exports(struct obd_device *obd,
+                                   int (*test_export)(struct obd_export *))
 {
         struct list_head work_list;
         struct list_head *pos, *n;
@@ -983,18 +1125,28 @@ void class_disconnect_stale_exports(struct obd_device *obd)
         spin_lock(&obd->obd_dev_lock);
         list_for_each_safe(pos, n, &obd->obd_exports) {
                 exp = list_entry(pos, struct obd_export, exp_obd_chain);
-                if (exp->exp_replay_needed) {
-                        list_del(&exp->exp_obd_chain);
-                        list_add(&exp->exp_obd_chain, &work_list);
-                        cnt++;
-                }
+                if (test_export(exp))
+                        continue;
+
+                list_del(&exp->exp_obd_chain);
+                list_add(&exp->exp_obd_chain, &work_list);
+                /* don't count self-export as client */
+                if (obd_uuid_equals(&exp->exp_client_uuid,
+                                     &exp->exp_obd->obd_uuid))
+                        continue;
+
+                cnt++;
+                CDEBUG(D_ERROR, "%s: disconnect stale client %s@%s\n",
+                       obd->obd_name, exp->exp_client_uuid.uuid,
+                       exp->exp_connection == NULL ? "<unknown>" :
+                       libcfs_nid2str(exp->exp_connection->c_peer.nid));
         }
         spin_unlock(&obd->obd_dev_lock);
 
         CDEBUG(D_ERROR, "%s: disconnecting %d stale clients\n",
                obd->obd_name, cnt);
         class_disconnect_export_list(&work_list, get_exp_flags_from_obd(obd));
-        EXIT;
+        RETURN(cnt);
 }
 EXPORT_SYMBOL(class_disconnect_stale_exports);
 
@@ -1176,28 +1328,30 @@ char *obd_export_nid2str(struct obd_export *exp)
 {
         if (exp->exp_connection != NULL)
                 return libcfs_nid2str(exp->exp_connection->c_peer.nid);
-        
+
         return "(no nid)";
 }
 EXPORT_SYMBOL(obd_export_nid2str);
 
-#define EVICT_BATCH 32
-int obd_export_evict_by_nid(struct obd_device *obd, char *nid)
+int obd_export_evict_by_nid(struct obd_device *obd, const char *nid)
 {
         struct obd_export *doomed_exp = NULL;
         int exports_evicted = 0;
 
-        lnet_nid_t nid_key = libcfs_str2nid(nid);
+        lnet_nid_t nid_key = libcfs_str2nid((char *)nid);
 
         do {
                 doomed_exp = lustre_hash_get_object_by_key(obd->obd_nid_hash_body,
                                                            &nid_key);
-
                 if (doomed_exp == NULL)
                         break;
 
-                LASSERT(strcmp(obd_export_nid2str(doomed_exp), libcfs_nid2str(nid_key)) ==0 );
-        
+                LASSERTF(doomed_exp->exp_connection->c_peer.nid == nid_key,
+                         "nid %s found, wanted nid %s, requested nid %s\n",
+                         obd_export_nid2str(doomed_exp),
+                         libcfs_nid2str(nid_key), nid);
+                LASSERTF(doomed_exp != obd->obd_self_export,
+                         "self-export is hashed by NID?\n");
                 exports_evicted++;
                 CWARN("%s: evict NID '%s' (%s) #%d at adminstrative request\n",
                        obd->obd_name, nid, doomed_exp->exp_client_uuid.uuid,
@@ -1213,15 +1367,19 @@ int obd_export_evict_by_nid(struct obd_device *obd, char *nid)
 }
 EXPORT_SYMBOL(obd_export_evict_by_nid);
 
-int obd_export_evict_by_uuid(struct obd_device *obd, char *uuid)
+int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid)
 {
         struct obd_export *doomed_exp = NULL;
         struct obd_uuid doomed;
         int exports_evicted = 0;
 
         obd_str2uuid(&doomed, uuid);
+        if (obd_uuid_equals(&doomed, &obd->obd_uuid)) {
+                CERROR("%s: can't evict myself\n", obd->obd_name);
+                return exports_evicted;
+        }
 
-        doomed_exp = lustre_hash_get_object_by_key(obd->obd_uuid_hash_body, 
+        doomed_exp = lustre_hash_get_object_by_key(obd->obd_uuid_hash_body,
                                                    &doomed);
 
         if (doomed_exp == NULL) {
@@ -1239,11 +1397,15 @@ int obd_export_evict_by_uuid(struct obd_device *obd, char *uuid)
 }
 EXPORT_SYMBOL(obd_export_evict_by_uuid);
 
-void obd_zombie_impexp_cull(void) 
+/**
+ * kill zombie imports and exports
+ */
+void obd_zombie_impexp_cull(void)
 {
         struct obd_import *import;
         struct obd_export *export;
-        
+        ENTRY;
+
         do {
                 spin_lock (&obd_zombie_impexp_lock);
 
@@ -1254,7 +1416,7 @@ void obd_zombie_impexp_cull(void)
                                             imp_zombie_chain);
                         list_del(&import->imp_zombie_chain);
                 }
-                
+
                 export = NULL;
                 if (!list_empty(&obd_zombie_exports)) {
                         export = list_entry(obd_zombie_exports.next,
@@ -1264,7 +1426,7 @@ void obd_zombie_impexp_cull(void)
                 }
 
                 spin_unlock(&obd_zombie_impexp_lock);
-                
+
                 if (import != NULL)
                         class_import_destroy(import);
 
@@ -1272,12 +1434,138 @@ void obd_zombie_impexp_cull(void)
                         class_export_destroy(export);
 
         } while (import != NULL || export != NULL);
+        EXIT;
+}
+
+static struct completion        obd_zombie_start;
+static struct completion        obd_zombie_stop;
+static unsigned long            obd_zombie_flags;
+static cfs_waitq_t              obd_zombie_waitq;
+
+enum {
+        OBD_ZOMBIE_STOP = 1
+};
+
+/**
+ * check for work for kill zombie import/export thread.
+ */
+int obd_zombie_impexp_check(void *arg)
+{
+        int rc;
+
+        spin_lock(&obd_zombie_impexp_lock);
+        rc = list_empty(&obd_zombie_imports) &&
+             list_empty(&obd_zombie_exports) &&
+             !test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags);
+
+        spin_unlock(&obd_zombie_impexp_lock);
+
+        RETURN(rc);
 }
-EXPORT_SYMBOL(obd_zombie_impexp_cull);
 
-void obd_zombie_impexp_init(void)
+/**
+ * notify import/export destroy thread about new zombie.
+ */
+static void obd_zombie_impexp_notify(void)
 {
-        INIT_LIST_HEAD(&obd_zombie_imports);
-        INIT_LIST_HEAD(&obd_zombie_exports);
+        cfs_waitq_signal(&obd_zombie_waitq);
+}
+
+#ifdef __KERNEL__
+
+/**
+ * destroy zombie export/import thread.
+ */
+static int obd_zombie_impexp_thread(void *unused)
+{
+        int rc;
+
+        if ((rc = cfs_daemonize_ctxt("obd_zombid"))) {
+                complete(&obd_zombie_start);
+                RETURN(rc);
+        }
+
+        complete(&obd_zombie_start);
+
+        while(!test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags)) {
+                struct l_wait_info lwi = { 0 };
+
+                l_wait_event(obd_zombie_waitq, !obd_zombie_impexp_check(NULL), &lwi);
+
+                obd_zombie_impexp_cull();
+        }
+
+        complete(&obd_zombie_stop);
+
+        RETURN(0);
+}
+
+#else /* ! KERNEL */
+
+static atomic_t zombie_recur = ATOMIC_INIT(0);
+static void *obd_zombie_impexp_work_cb;
+static void *obd_zombie_impexp_idle_cb;
+
+int obd_zombie_impexp_kill(void *arg)
+{
+        int rc = 0;
+
+       if (atomic_inc_return(&zombie_recur) == 1) {
+                obd_zombie_impexp_cull();
+                rc = 1;
+        }
+        atomic_dec(&zombie_recur);
+        return rc;
+}
+
+#endif
+
+/**
+ * start destroy zombie import/export thread
+ */
+int obd_zombie_impexp_init(void)
+{
+        int rc;
+
+        CFS_INIT_LIST_HEAD(&obd_zombie_imports);
+        CFS_INIT_LIST_HEAD(&obd_zombie_exports);
         spin_lock_init(&obd_zombie_impexp_lock);
+        init_completion(&obd_zombie_start);
+        init_completion(&obd_zombie_stop);
+        cfs_waitq_init(&obd_zombie_waitq);
+
+#ifdef __KERNEL__
+        rc = cfs_kernel_thread(obd_zombie_impexp_thread, NULL, 0);
+        if (rc < 0)
+                RETURN(rc);
+
+        wait_for_completion(&obd_zombie_start);
+#else
+
+        obd_zombie_impexp_work_cb =
+                liblustre_register_wait_callback("obd_zombi_impexp_kill",
+                                                 &obd_zombie_impexp_kill, NULL);
+
+        obd_zombie_impexp_idle_cb =
+                liblustre_register_idle_callback("obd_zombi_impexp_check",
+                                                 &obd_zombie_impexp_check, NULL);
+        rc = 0;
+
+#endif
+        RETURN(rc);
+}
+/**
+ * stop destroy zombie import/export thread
+ */
+void obd_zombie_impexp_stop(void)
+{
+        set_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags);
+        obd_zombie_impexp_notify();
+#ifdef __KERNEL__
+        wait_for_completion(&obd_zombie_stop);
+#else
+        liblustre_deregister_wait_callback(obd_zombie_impexp_work_cb);
+        liblustre_deregister_idle_callback(obd_zombie_impexp_idle_cb);
+#endif
 }
+