Whamcloud - gitweb
LU-7845 gss: support namespace in lgss_keyring
[fs/lustre-release.git] / lustre / obdclass / genops.c
index 109394c..d5352b0 100644 (file)
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -27,7 +23,7 @@
  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2013, Intel Corporation.
+ * Copyright (c) 2011, 2015, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
-#ifndef __KERNEL__
-#include <liblustre.h>
-#endif
+
+#include <linux/pid_namespace.h>
+#include <linux/kthread.h>
 #include <obd_class.h>
 #include <lprocfs_status.h>
+#include <lustre_disk.h>
+#include <lustre_kernelcomm.h>
 
-extern cfs_list_t obd_types;
 spinlock_t obd_types_lock;
 
-struct kmem_cache *obd_device_cachep;
+static struct kmem_cache *obd_device_cachep;
 struct kmem_cache *obdo_cachep;
 EXPORT_SYMBOL(obdo_cachep);
-struct kmem_cache *import_cachep;
+static struct kmem_cache *import_cachep;
+
+static struct list_head obd_zombie_imports;
+static struct list_head obd_zombie_exports;
+static spinlock_t  obd_zombie_impexp_lock;
 
-cfs_list_t      obd_zombie_imports;
-cfs_list_t      obd_zombie_exports;
-spinlock_t  obd_zombie_impexp_lock;
 static void obd_zombie_impexp_notify(void);
 static void obd_zombie_export_add(struct obd_export *exp);
 static void obd_zombie_import_add(struct obd_import *imp);
 static void print_export_data(struct obd_export *exp,
-                              const char *status, int locks);
+                              const char *status, int locks, int debug_level);
+
+struct list_head obd_stale_exports;
+spinlock_t       obd_stale_export_lock;
+atomic_t         obd_stale_export_num;
 
 int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
 EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
@@ -97,12 +99,12 @@ static void obd_device_free(struct obd_device *obd)
 
 struct obd_type *class_search_type(const char *name)
 {
-       cfs_list_t *tmp;
+       struct list_head *tmp;
        struct obd_type *type;
 
        spin_lock(&obd_types_lock);
-       cfs_list_for_each(tmp, &obd_types) {
-               type = cfs_list_entry(tmp, struct obd_type, typ_chain);
+       list_for_each(tmp, &obd_types) {
+               type = list_entry(tmp, struct obd_type, typ_chain);
                if (strcmp(type->typ_name, name) == 0) {
                        spin_unlock(&obd_types_lock);
                        return type;
@@ -147,7 +149,6 @@ struct obd_type *class_get_type(const char *name)
        }
        return type;
 }
-EXPORT_SYMBOL(class_get_type);
 
 void class_put_type(struct obd_type *type)
 {
@@ -157,15 +158,11 @@ void class_put_type(struct obd_type *type)
        module_put(type->typ_dt_ops->o_owner);
        spin_unlock(&type->obd_type_lock);
 }
-EXPORT_SYMBOL(class_put_type);
 
 #define CLASS_MAX_NAME 1024
 
 int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
-                       bool enable_proc, struct lprocfs_seq_vars *module_vars,
-#ifndef HAVE_ONLY_PROCFS_SEQ
-                       struct lprocfs_vars *vars,
-#endif
+                       bool enable_proc, struct lprocfs_vars *vars,
                        const char *name, struct lu_device_type *ldt)
 {
         struct obd_type *type;
@@ -201,20 +198,11 @@ int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
         strcpy(type->typ_name, name);
        spin_lock_init(&type->obd_type_lock);
 
-#ifdef LPROCFS
+#ifdef CONFIG_PROC_FS
        if (enable_proc) {
-#ifndef HAVE_ONLY_PROCFS_SEQ
-               if (vars) {
-                       type->typ_procroot = lprocfs_register(type->typ_name,
-                                                             proc_lustre_root,
-                                                             vars, type);
-               } else
-#endif
-               {
-                       type->typ_procroot = lprocfs_seq_register(type->typ_name,
-                                                                 proc_lustre_root,
-                                                                 module_vars, type);
-               }
+               type->typ_procroot = lprocfs_register(type->typ_name,
+                                                     proc_lustre_root,
+                                                     vars, type);
                if (IS_ERR(type->typ_procroot)) {
                        rc = PTR_ERR(type->typ_procroot);
                        type->typ_procroot = NULL;
@@ -230,23 +218,17 @@ int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
         }
 
        spin_lock(&obd_types_lock);
-       cfs_list_add(&type->typ_chain, &obd_types);
+       list_add(&type->typ_chain, &obd_types);
        spin_unlock(&obd_types_lock);
 
         RETURN (0);
 
 failed:
        if (type->typ_name != NULL) {
-#ifdef LPROCFS
-               if (type->typ_procroot != NULL) {
-#ifndef HAVE_ONLY_PROCFS_SEQ
-                       lprocfs_try_remove_proc_entry(type->typ_name,
-                                                     proc_lustre_root);
-#else
+#ifdef CONFIG_PROC_FS
+               if (type->typ_procroot != NULL)
                        remove_proc_subtree(type->typ_name, proc_lustre_root);
 #endif
-               }
-#endif
                 OBD_FREE(type->typ_name, strlen(name) + 1);
        }
         if (type->typ_md_ops != NULL)
@@ -281,15 +263,9 @@ int class_unregister_type(const char *name)
         * other modules can share names (i.e. lod can use lov entry). so
         * we can't reference pointer as it can get invalided when another
         * module removes the entry */
-#ifdef LPROCFS
-       if (type->typ_procroot != NULL) {
-#ifndef HAVE_ONLY_PROCFS_SEQ
-               lprocfs_try_remove_proc_entry(type->typ_name, proc_lustre_root);
-#else
+#ifdef CONFIG_PROC_FS
+       if (type->typ_procroot != NULL)
                remove_proc_subtree(type->typ_name, proc_lustre_root);
-#endif
-       }
-
        if (type->typ_procsym != NULL)
                lprocfs_remove(&type->typ_procsym);
 #endif
@@ -297,7 +273,7 @@ int class_unregister_type(const char *name)
                 lu_device_type_fini(type->typ_lu);
 
        spin_lock(&obd_types_lock);
-       cfs_list_del(&type->typ_chain);
+       list_del(&type->typ_chain);
        spin_unlock(&obd_types_lock);
         OBD_FREE(type->typ_name, strlen(name) + 1);
         if (type->typ_dt_ops != NULL)
@@ -446,7 +422,6 @@ int class_name2dev(const char *name)
 
         return -1;
 }
-EXPORT_SYMBOL(class_name2dev);
 
 struct obd_device *class_name2obd(const char *name)
 {
@@ -476,7 +451,6 @@ int class_uuid2dev(struct obd_uuid *uuid)
 
         return -1;
 }
-EXPORT_SYMBOL(class_uuid2dev);
 
 struct obd_device *class_uuid2obd(struct obd_uuid *uuid)
 {
@@ -514,7 +488,6 @@ struct obd_device *class_num2obd(int num)
 
         return obd;
 }
-EXPORT_SYMBOL(class_num2obd);
 
 /**
  * Get obd devices count. Device in any
@@ -644,19 +617,22 @@ int class_notify_sptlrpc_conf(const char *fsname, int namelen)
         LASSERT(namelen > 0);
 
        read_lock(&obd_dev_lock);
-        for (i = 0; i < class_devno_max(); i++) {
-                obd = class_num2obd(i);
+       for (i = 0; i < class_devno_max(); i++) {
+               obd = class_num2obd(i);
 
-                if (obd == NULL || obd->obd_set_up == 0 || obd->obd_stopping)
-                        continue;
+               if (obd == NULL || obd->obd_set_up == 0 || obd->obd_stopping)
+                       continue;
 
-                /* only notify mdc, osc, mdt, ost */
-                type = obd->obd_type->typ_name;
-                if (strcmp(type, LUSTRE_MDC_NAME) != 0 &&
-                    strcmp(type, LUSTRE_OSC_NAME) != 0 &&
-                    strcmp(type, LUSTRE_MDT_NAME) != 0 &&
-                    strcmp(type, LUSTRE_OST_NAME) != 0)
-                        continue;
+               /* only notify mdc, osc, osp, lwp, mdt, ost
+                * because only these have a -sptlrpc llog */
+               type = obd->obd_type->typ_name;
+               if (strcmp(type, LUSTRE_MDC_NAME) != 0 &&
+                   strcmp(type, LUSTRE_OSC_NAME) != 0 &&
+                   strcmp(type, LUSTRE_OSP_NAME) != 0 &&
+                   strcmp(type, LUSTRE_LWP_NAME) != 0 &&
+                   strcmp(type, LUSTRE_MDT_NAME) != 0 &&
+                   strcmp(type, LUSTRE_OST_NAME) != 0)
+                       continue;
 
                 if (strncmp(obd->obd_name, fsname, namelen))
                         continue;
@@ -690,10 +666,7 @@ void obd_cleanup_caches(void)
                kmem_cache_destroy(import_cachep);
                 import_cachep = NULL;
         }
-        if (capa_cachep) {
-               kmem_cache_destroy(capa_cachep);
-                capa_cachep = NULL;
-        }
+
         EXIT;
 }
 
@@ -722,12 +695,6 @@ int obd_init_caches(void)
        if (!import_cachep)
                GOTO(out, rc = -ENOMEM);
 
-       LASSERT(capa_cachep == NULL);
-       capa_cachep = kmem_cache_create("capa_cache", sizeof(struct obd_capa),
-                                       0, 0, NULL);
-       if (!capa_cachep)
-               GOTO(out, rc = -ENOMEM);
-
        RETURN(0);
 out:
        obd_cleanup_caches();
@@ -750,7 +717,7 @@ struct obd_export *class_conn2export(struct lustre_handle *conn)
                 RETURN(NULL);
         }
 
-        CDEBUG(D_INFO, "looking for export cookie "LPX64"\n", conn->cookie);
+       CDEBUG(D_INFO, "looking for export cookie %#llx\n", conn->cookie);
        export = class_handle2object(conn->cookie, NULL);
        RETURN(export);
 }
@@ -775,7 +742,6 @@ struct obd_device *class_conn2obd(struct lustre_handle *conn)
         }
         return NULL;
 }
-EXPORT_SYMBOL(class_conn2obd);
 
 struct obd_import *class_exp2cliimp(struct obd_export *exp)
 {
@@ -793,7 +759,6 @@ struct obd_import *class_conn2cliimp(struct lustre_handle *conn)
                 return NULL;
         return obd->u.cli.cl_import;
 }
-EXPORT_SYMBOL(class_conn2cliimp);
 
 /* Export management functions */
 static void class_export_destroy(struct obd_export *exp)
@@ -811,10 +776,10 @@ static void class_export_destroy(struct obd_export *exp)
         if (exp->exp_connection)
                 ptlrpc_put_connection_superhack(exp->exp_connection);
 
-        LASSERT(cfs_list_empty(&exp->exp_outstanding_replies));
-        LASSERT(cfs_list_empty(&exp->exp_uncommitted_replies));
-        LASSERT(cfs_list_empty(&exp->exp_req_replay_queue));
-        LASSERT(cfs_list_empty(&exp->exp_hp_rpcs));
+       LASSERT(list_empty(&exp->exp_outstanding_replies));
+       LASSERT(list_empty(&exp->exp_uncommitted_replies));
+       LASSERT(list_empty(&exp->exp_req_replay_queue));
+       LASSERT(list_empty(&exp->exp_hp_rpcs));
         obd_destroy_export(exp);
         class_decref(obd, "export", exp);
 
@@ -849,15 +814,16 @@ void class_export_put(struct obd_export *exp)
               atomic_read(&exp->exp_refcount) - 1);
 
        if (atomic_dec_and_test(&exp->exp_refcount)) {
-                LASSERT(!cfs_list_empty(&exp->exp_obd_chain));
-                CDEBUG(D_IOCTL, "final put %p/%s\n",
-                       exp, exp->exp_client_uuid.uuid);
+               LASSERT(!list_empty(&exp->exp_obd_chain));
+               LASSERT(list_empty(&exp->exp_stale_list));
+               CDEBUG(D_IOCTL, "final put %p/%s\n",
+                      exp, exp->exp_client_uuid.uuid);
 
-                /* release nid stat refererence */
-                lprocfs_exp_cleanup(exp);
+               /* release nid stat refererence */
+               lprocfs_exp_cleanup(exp);
 
-                obd_zombie_export_add(exp);
-        }
+               obd_zombie_export_add(exp);
+       }
 }
 EXPORT_SYMBOL(class_export_put);
 
@@ -868,7 +834,7 @@ struct obd_export *class_new_export(struct obd_device *obd,
                                     struct obd_uuid *cluuid)
 {
         struct obd_export *export;
-        cfs_hash_t *hash = NULL;
+       struct cfs_hash *hash = NULL;
         int rc = 0;
         ENTRY;
 
@@ -884,26 +850,28 @@ struct obd_export *class_new_export(struct obd_device *obd,
        atomic_set(&export->exp_cb_count, 0);
        atomic_set(&export->exp_locks_count, 0);
 #if LUSTRE_TRACKS_LOCK_EXP_REFS
-        CFS_INIT_LIST_HEAD(&export->exp_locks_list);
+       INIT_LIST_HEAD(&export->exp_locks_list);
        spin_lock_init(&export->exp_locks_list_guard);
 #endif
        atomic_set(&export->exp_replay_count, 0);
        export->exp_obd = obd;
-       CFS_INIT_LIST_HEAD(&export->exp_outstanding_replies);
+       INIT_LIST_HEAD(&export->exp_outstanding_replies);
        spin_lock_init(&export->exp_uncommitted_replies_lock);
-       CFS_INIT_LIST_HEAD(&export->exp_uncommitted_replies);
-       CFS_INIT_LIST_HEAD(&export->exp_req_replay_queue);
-       CFS_INIT_LIST_HEAD(&export->exp_handle.h_link);
-       CFS_INIT_LIST_HEAD(&export->exp_hp_rpcs);
-       CFS_INIT_LIST_HEAD(&export->exp_reg_rpcs);
+       INIT_LIST_HEAD(&export->exp_uncommitted_replies);
+       INIT_LIST_HEAD(&export->exp_req_replay_queue);
+       INIT_LIST_HEAD(&export->exp_handle.h_link);
+       INIT_LIST_HEAD(&export->exp_hp_rpcs);
+       INIT_LIST_HEAD(&export->exp_reg_rpcs);
        class_handle_hash(&export->exp_handle, &export_handle_ops);
        export->exp_last_request_time = cfs_time_current_sec();
        spin_lock_init(&export->exp_lock);
        spin_lock_init(&export->exp_rpc_lock);
-       CFS_INIT_HLIST_NODE(&export->exp_uuid_hash);
-       CFS_INIT_HLIST_NODE(&export->exp_nid_hash);
+       INIT_HLIST_NODE(&export->exp_uuid_hash);
+       INIT_HLIST_NODE(&export->exp_nid_hash);
+       INIT_HLIST_NODE(&export->exp_gen_hash);
        spin_lock_init(&export->exp_bl_list_lock);
-       CFS_INIT_LIST_HEAD(&export->exp_bl_list);
+       INIT_LIST_HEAD(&export->exp_bl_list);
+       INIT_LIST_HEAD(&export->exp_stale_list);
 
        export->exp_sp_peer = LUSTRE_SP_ANY;
        export->exp_flvr.sf_rpc = SPTLRPC_FLVR_INVALID;
@@ -929,6 +897,7 @@ struct obd_export *class_new_export(struct obd_device *obd,
                 }
         }
 
+       at_init(&export->exp_bl_lock_at, obd_timeout, 0);
        spin_lock(&obd->obd_dev_lock);
         if (obd->obd_stopping) {
                 cfs_hash_del(hash, cluuid, &export->exp_uuid_hash);
@@ -936,9 +905,9 @@ struct obd_export *class_new_export(struct obd_device *obd,
         }
 
         class_incref(obd, "export", export);
-        cfs_list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports);
-        cfs_list_add_tail(&export->exp_obd_chain_timed,
-                          &export->exp_obd->obd_exports_timed);
+       list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports);
+       list_add_tail(&export->exp_obd_chain_timed,
+                     &export->exp_obd->obd_exports_timed);
         export->exp_obd->obd_num_exports++;
        spin_unlock(&obd->obd_dev_lock);
        cfs_hash_putref(hash);
@@ -950,7 +919,7 @@ exit_err:
         if (hash)
                 cfs_hash_putref(hash);
         class_handle_unhash(&export->exp_handle);
-        LASSERT(cfs_hlist_unhashed(&export->exp_uuid_hash));
+       LASSERT(hlist_unhashed(&export->exp_uuid_hash));
         obd_destroy_export(export);
         OBD_FREE_PTR(export);
         return ERR_PTR(rc);
@@ -963,21 +932,37 @@ void class_unlink_export(struct obd_export *exp)
 
        spin_lock(&exp->exp_obd->obd_dev_lock);
        /* delete an uuid-export hashitem from hashtables */
-       if (!cfs_hlist_unhashed(&exp->exp_uuid_hash))
+       if (!hlist_unhashed(&exp->exp_uuid_hash))
                cfs_hash_del(exp->exp_obd->obd_uuid_hash,
                             &exp->exp_client_uuid,
                             &exp->exp_uuid_hash);
 
-       cfs_list_move(&exp->exp_obd_chain, &exp->exp_obd->obd_unlinked_exports);
-       cfs_list_del_init(&exp->exp_obd_chain_timed);
+       if (!hlist_unhashed(&exp->exp_gen_hash)) {
+               struct tg_export_data   *ted = &exp->exp_target_data;
+               struct cfs_hash         *hash;
+
+               /* Because obd_gen_hash will not be released until
+                * class_cleanup(), so hash should never be NULL here */
+               hash = cfs_hash_getref(exp->exp_obd->obd_gen_hash);
+               LASSERT(hash != NULL);
+               cfs_hash_del(hash, &ted->ted_lcd->lcd_generation,
+                            &exp->exp_gen_hash);
+               cfs_hash_putref(hash);
+       }
+
+       list_move(&exp->exp_obd_chain, &exp->exp_obd->obd_unlinked_exports);
+       list_del_init(&exp->exp_obd_chain_timed);
        exp->exp_obd->obd_num_exports--;
        spin_unlock(&exp->exp_obd->obd_dev_lock);
-       class_export_put(exp);
+       atomic_inc(&obd_stale_export_num);
+
+       /* A reference is kept by obd_stale_exports list */
+       obd_stale_export_put(exp);
 }
 EXPORT_SYMBOL(class_unlink_export);
 
 /* Import management functions */
-void class_import_destroy(struct obd_import *imp)
+static void class_import_destroy(struct obd_import *imp)
 {
         ENTRY;
 
@@ -988,12 +973,12 @@ void class_import_destroy(struct obd_import *imp)
 
         ptlrpc_put_connection_superhack(imp->imp_connection);
 
-        while (!cfs_list_empty(&imp->imp_conn_list)) {
-                struct obd_import_conn *imp_conn;
+       while (!list_empty(&imp->imp_conn_list)) {
+               struct obd_import_conn *imp_conn;
 
-                imp_conn = cfs_list_entry(imp->imp_conn_list.next,
-                                          struct obd_import_conn, oic_item);
-                cfs_list_del_init(&imp_conn->oic_item);
+               imp_conn = list_entry(imp->imp_conn_list.next,
+                                     struct obd_import_conn, oic_item);
+               list_del_init(&imp_conn->oic_item);
                 ptlrpc_put_connection_superhack(imp_conn->oic_conn);
                 OBD_FREE(imp_conn, sizeof(*imp_conn));
         }
@@ -1026,9 +1011,9 @@ EXPORT_SYMBOL(class_import_get);
 
 void class_import_put(struct obd_import *imp)
 {
-        ENTRY;
+       ENTRY;
 
-        LASSERT(cfs_list_empty(&imp->imp_zombie_chain));
+       LASSERT(list_empty(&imp->imp_zombie_chain));
         LASSERT_ATOMIC_GT_LT(&imp->imp_refcount, 0, LI_POISON);
 
         CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", imp,
@@ -1061,17 +1046,20 @@ static void init_imp_at(struct imp_at *at) {
 struct obd_import *class_new_import(struct obd_device *obd)
 {
        struct obd_import *imp;
+       struct pid_namespace *curr_pid_ns = ll_task_pid_ns(current);
 
        OBD_ALLOC(imp, sizeof(*imp));
        if (imp == NULL)
                return NULL;
 
-       CFS_INIT_LIST_HEAD(&imp->imp_pinger_chain);
-       CFS_INIT_LIST_HEAD(&imp->imp_zombie_chain);
-       CFS_INIT_LIST_HEAD(&imp->imp_replay_list);
-       CFS_INIT_LIST_HEAD(&imp->imp_sending_list);
-       CFS_INIT_LIST_HEAD(&imp->imp_delayed_list);
-       CFS_INIT_LIST_HEAD(&imp->imp_committed_list);
+       INIT_LIST_HEAD(&imp->imp_pinger_chain);
+       INIT_LIST_HEAD(&imp->imp_zombie_chain);
+       INIT_LIST_HEAD(&imp->imp_replay_list);
+       INIT_LIST_HEAD(&imp->imp_sending_list);
+       INIT_LIST_HEAD(&imp->imp_delayed_list);
+       INIT_LIST_HEAD(&imp->imp_committed_list);
+       INIT_LIST_HEAD(&imp->imp_unreplied_list);
+       imp->imp_known_replied_xid = 0;
        imp->imp_replay_cursor = &imp->imp_committed_list;
        spin_lock_init(&imp->imp_lock);
        imp->imp_last_success_conn = 0;
@@ -1080,13 +1068,18 @@ struct obd_import *class_new_import(struct obd_device *obd)
        mutex_init(&imp->imp_sec_mutex);
        init_waitqueue_head(&imp->imp_recovery_waitq);
 
+       if (curr_pid_ns->child_reaper)
+               imp->imp_sec_refpid = curr_pid_ns->child_reaper->pid;
+       else
+               imp->imp_sec_refpid = 1;
+
        atomic_set(&imp->imp_refcount, 2);
        atomic_set(&imp->imp_unregistering, 0);
        atomic_set(&imp->imp_inflight, 0);
        atomic_set(&imp->imp_replay_inflight, 0);
        atomic_set(&imp->imp_inval_count, 0);
-       CFS_INIT_LIST_HEAD(&imp->imp_conn_list);
-       CFS_INIT_LIST_HEAD(&imp->imp_handle.h_link);
+       INIT_LIST_HEAD(&imp->imp_conn_list);
+       INIT_LIST_HEAD(&imp->imp_handle.h_link);
        class_handle_hash(&imp->imp_handle, &import_handle_ops);
        init_imp_at(&imp->imp_at);
 
@@ -1126,7 +1119,7 @@ void __class_export_add_lock_ref(struct obd_export *exp, struct ldlm_lock *lock)
                               exp, lock, lock->l_exp_refs_target);
         }
         if ((lock->l_exp_refs_nr ++) == 0) {
-                cfs_list_add(&lock->l_exp_refs_link, &exp->exp_locks_list);
+               list_add(&lock->l_exp_refs_link, &exp->exp_locks_list);
                 lock->l_exp_refs_target = exp;
         }
         CDEBUG(D_INFO, "lock = %p, export = %p, refs = %u\n",
@@ -1145,7 +1138,7 @@ void __class_export_del_lock_ref(struct obd_export *exp, struct ldlm_lock *lock)
                               lock, lock->l_exp_refs_target, exp);
         }
         if (-- lock->l_exp_refs_nr == 0) {
-                cfs_list_del_init(&lock->l_exp_refs_link);
+               list_del_init(&lock->l_exp_refs_link);
                 lock->l_exp_refs_target = NULL;
         }
         CDEBUG(D_INFO, "lock = %p, export = %p, refs = %u\n",
@@ -1175,14 +1168,14 @@ int class_connect(struct lustre_handle *conn, struct obd_device *obd,
         conn->cookie = export->exp_handle.h_cookie;
         class_export_put(export);
 
-        CDEBUG(D_IOCTL, "connect: client %s, cookie "LPX64"\n",
+       CDEBUG(D_IOCTL, "connect: client %s, cookie %#llx\n",
                cluuid->uuid, conn->cookie);
         RETURN(0);
 }
 EXPORT_SYMBOL(class_connect);
 
 /* if export is involved in recovery then clean up related things */
-void class_export_recovery_cleanup(struct obd_export *exp)
+static void class_export_recovery_cleanup(struct obd_export *exp)
 {
        struct obd_device *obd = exp->exp_obd;
 
@@ -1248,14 +1241,14 @@ int class_disconnect(struct obd_export *export)
          * all end up in here, and if any of them race we shouldn't
          * call extra class_export_puts(). */
         if (already_disconnected) {
-                LASSERT(cfs_hlist_unhashed(&export->exp_nid_hash));
+               LASSERT(hlist_unhashed(&export->exp_nid_hash));
                 GOTO(no_disconn, already_disconnected);
         }
 
-        CDEBUG(D_IOCTL, "disconnect: cookie "LPX64"\n",
+       CDEBUG(D_IOCTL, "disconnect: cookie %#llx\n",
                export->exp_handle.h_cookie);
 
-        if (!cfs_hlist_unhashed(&export->exp_nid_hash))
+       if (!hlist_unhashed(&export->exp_nid_hash))
                 cfs_hash_del(export->exp_obd->obd_nid_hash,
                              &export->exp_connection->c_peer.nid,
                              &export->exp_nid_hash);
@@ -1282,7 +1275,7 @@ int class_connected_export(struct obd_export *exp)
 }
 EXPORT_SYMBOL(class_connected_export);
 
-static void class_disconnect_export_list(cfs_list_t *list,
+static void class_disconnect_export_list(struct list_head *list,
                                          enum obd_option flags)
 {
         int rc;
@@ -1291,11 +1284,11 @@ static void class_disconnect_export_list(cfs_list_t *list,
 
         /* It's possible that an export may disconnect itself, but
          * nothing else will be added to this list. */
-        while (!cfs_list_empty(list)) {
-                exp = cfs_list_entry(list->next, struct obd_export,
-                                     exp_obd_chain);
-                /* need for safe call CDEBUG after obd_disconnect */
-                class_export_get(exp);
+       while (!list_empty(list)) {
+               exp = list_entry(list->next, struct obd_export,
+                                exp_obd_chain);
+               /* need for safe call CDEBUG after obd_disconnect */
+               class_export_get(exp);
 
                spin_lock(&exp->exp_lock);
                exp->exp_flags = flags;
@@ -1308,7 +1301,7 @@ static void class_disconnect_export_list(cfs_list_t *list,
                                exp);
                         /* Need to delete this now so we don't end up pointing
                          * to work_list later when this export is cleaned up. */
-                        cfs_list_del_init(&exp->exp_obd_chain);
+                       list_del_init(&exp->exp_obd_chain);
                         class_export_put(exp);
                         continue;
                 }
@@ -1330,17 +1323,17 @@ static void class_disconnect_export_list(cfs_list_t *list,
 
 void class_disconnect_exports(struct obd_device *obd)
 {
-       cfs_list_t work_list;
+       struct list_head work_list;
        ENTRY;
 
        /* Move all of the exports from obd_exports to a work list, en masse. */
-       CFS_INIT_LIST_HEAD(&work_list);
+       INIT_LIST_HEAD(&work_list);
        spin_lock(&obd->obd_dev_lock);
-       cfs_list_splice_init(&obd->obd_exports, &work_list);
-       cfs_list_splice_init(&obd->obd_delayed_exports, &work_list);
+       list_splice_init(&obd->obd_exports, &work_list);
+       list_splice_init(&obd->obd_delayed_exports, &work_list);
        spin_unlock(&obd->obd_dev_lock);
 
-        if (!cfs_list_empty(&work_list)) {
+       if (!list_empty(&work_list)) {
                 CDEBUG(D_HA, "OBD device %d (%p) has exports, "
                        "disconnecting them\n", obd->obd_minor, obd);
                 class_disconnect_export_list(&work_list,
@@ -1357,15 +1350,15 @@ EXPORT_SYMBOL(class_disconnect_exports);
 void class_disconnect_stale_exports(struct obd_device *obd,
                                     int (*test_export)(struct obd_export *))
 {
-        cfs_list_t work_list;
+       struct list_head work_list;
        struct obd_export *exp, *n;
         int evicted = 0;
         ENTRY;
 
-        CFS_INIT_LIST_HEAD(&work_list);
+       INIT_LIST_HEAD(&work_list);
        spin_lock(&obd->obd_dev_lock);
-       cfs_list_for_each_entry_safe(exp, n, &obd->obd_exports,
-                                    exp_obd_chain) {
+       list_for_each_entry_safe(exp, n, &obd->obd_exports,
+                                exp_obd_chain) {
                 /* don't count self-export as client */
                 if (obd_uuid_equals(&exp->exp_client_uuid,
                                     &exp->exp_obd->obd_uuid))
@@ -1384,13 +1377,13 @@ void class_disconnect_stale_exports(struct obd_device *obd,
                exp->exp_failed = 1;
                spin_unlock(&exp->exp_lock);
 
-                cfs_list_move(&exp->exp_obd_chain, &work_list);
+               list_move(&exp->exp_obd_chain, &work_list);
                 evicted++;
                 CDEBUG(D_HA, "%s: disconnect stale client %s@%s\n",
                        obd->obd_name, exp->exp_client_uuid.uuid,
                        exp->exp_connection == NULL ? "<unknown>" :
                        libcfs_nid2str(exp->exp_connection->c_peer.nid));
-                print_export_data(exp, "EVICTING", 0);
+                print_export_data(exp, "EVICTING", 0, D_HA);
         }
        spin_unlock(&obd->obd_dev_lock);
 
@@ -1453,7 +1446,7 @@ EXPORT_SYMBOL(obd_export_nid2str);
 
 int obd_export_evict_by_nid(struct obd_device *obd, const char *nid)
 {
-       cfs_hash_t *nid_hash;
+       struct cfs_hash *nid_hash;
        struct obd_export *doomed_exp = NULL;
        int exports_evicted = 0;
 
@@ -1501,7 +1494,7 @@ EXPORT_SYMBOL(obd_export_evict_by_nid);
 
 int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid)
 {
-       cfs_hash_t *uuid_hash;
+       struct cfs_hash *uuid_hash;
        struct obd_export *doomed_exp = NULL;
        struct obd_uuid doomed_uuid;
        int exports_evicted = 0;
@@ -1538,7 +1531,6 @@ int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid)
 
         return exports_evicted;
 }
-EXPORT_SYMBOL(obd_export_evict_by_uuid);
 
 #if LUSTRE_TRACKS_LOCK_EXP_REFS
 void (*class_export_dump_hook)(struct obd_export*) = NULL;
@@ -1546,71 +1538,71 @@ EXPORT_SYMBOL(class_export_dump_hook);
 #endif
 
 static void print_export_data(struct obd_export *exp, const char *status,
-                             int locks)
+                             int locks, int debug_level)
 {
        struct ptlrpc_reply_state *rs;
        struct ptlrpc_reply_state *first_reply = NULL;
        int nreplies = 0;
 
        spin_lock(&exp->exp_lock);
-       cfs_list_for_each_entry(rs, &exp->exp_outstanding_replies,
-                               rs_exp_list) {
+       list_for_each_entry(rs, &exp->exp_outstanding_replies,
+                           rs_exp_list) {
                if (nreplies == 0)
                        first_reply = rs;
                nreplies++;
        }
        spin_unlock(&exp->exp_lock);
 
-        CDEBUG(D_HA, "%s: %s %p %s %s %d (%d %d %d) %d %d %d %d: %p %s "LPU64"\n",
-               exp->exp_obd->obd_name, status, exp, exp->exp_client_uuid.uuid,
+       CDEBUG(debug_level, "%s: %s %p %s %s %d (%d %d %d) %d %d %d %d: "
+              "%p %s %llu stale:%d\n",
+              exp->exp_obd->obd_name, status, exp, exp->exp_client_uuid.uuid,
               obd_export_nid2str(exp), atomic_read(&exp->exp_refcount),
               atomic_read(&exp->exp_rpc_count),
               atomic_read(&exp->exp_cb_count),
               atomic_read(&exp->exp_locks_count),
-               exp->exp_disconnected, exp->exp_delayed, exp->exp_failed,
-               nreplies, first_reply, nreplies > 3 ? "..." : "",
-               exp->exp_last_committed);
+              exp->exp_disconnected, exp->exp_delayed, exp->exp_failed,
+              nreplies, first_reply, nreplies > 3 ? "..." : "",
+              exp->exp_last_committed, !list_empty(&exp->exp_stale_list));
 #if LUSTRE_TRACKS_LOCK_EXP_REFS
-        if (locks && class_export_dump_hook != NULL)
-                class_export_dump_hook(exp);
+       if (locks && class_export_dump_hook != NULL)
+               class_export_dump_hook(exp);
 #endif
 }
 
-void dump_exports(struct obd_device *obd, int locks)
+void dump_exports(struct obd_device *obd, int locks, int debug_level)
 {
         struct obd_export *exp;
 
        spin_lock(&obd->obd_dev_lock);
-       cfs_list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain)
-               print_export_data(exp, "ACTIVE", locks);
-       cfs_list_for_each_entry(exp, &obd->obd_unlinked_exports, exp_obd_chain)
-               print_export_data(exp, "UNLINKED", locks);
-       cfs_list_for_each_entry(exp, &obd->obd_delayed_exports, exp_obd_chain)
-               print_export_data(exp, "DELAYED", locks);
+       list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain)
+               print_export_data(exp, "ACTIVE", locks, debug_level);
+       list_for_each_entry(exp, &obd->obd_unlinked_exports, exp_obd_chain)
+               print_export_data(exp, "UNLINKED", locks, debug_level);
+       list_for_each_entry(exp, &obd->obd_delayed_exports, exp_obd_chain)
+               print_export_data(exp, "DELAYED", locks, debug_level);
        spin_unlock(&obd->obd_dev_lock);
        spin_lock(&obd_zombie_impexp_lock);
-       cfs_list_for_each_entry(exp, &obd_zombie_exports, exp_obd_chain)
-               print_export_data(exp, "ZOMBIE", locks);
+       list_for_each_entry(exp, &obd_zombie_exports, exp_obd_chain)
+               print_export_data(exp, "ZOMBIE", locks, debug_level);
        spin_unlock(&obd_zombie_impexp_lock);
 }
-EXPORT_SYMBOL(dump_exports);
 
 void obd_exports_barrier(struct obd_device *obd)
 {
        int waited = 2;
-       LASSERT(cfs_list_empty(&obd->obd_exports));
+       LASSERT(list_empty(&obd->obd_exports));
        spin_lock(&obd->obd_dev_lock);
-       while (!cfs_list_empty(&obd->obd_unlinked_exports)) {
+       while (!list_empty(&obd->obd_unlinked_exports)) {
                spin_unlock(&obd->obd_dev_lock);
-               schedule_timeout_and_set_state(TASK_UNINTERRUPTIBLE,
-                                                  cfs_time_seconds(waited));
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               schedule_timeout(cfs_time_seconds(waited));
                if (waited > 5 && IS_PO2(waited)) {
                        LCONSOLE_WARN("%s is waiting for obd_unlinked_exports "
                                      "more than %d seconds. "
                                      "The obd refcount = %d. Is it stuck?\n",
                                      obd->obd_name, waited,
                                      atomic_read(&obd->obd_refcount));
-                       dump_exports(obd, 1);
+                       dump_exports(obd, 1, D_CONSOLE | D_WARNING);
                }
                waited *= 2;
                spin_lock(&obd->obd_dev_lock);
@@ -1634,21 +1626,21 @@ void obd_zombie_impexp_cull(void)
        do {
                spin_lock(&obd_zombie_impexp_lock);
 
-                import = NULL;
-                if (!cfs_list_empty(&obd_zombie_imports)) {
-                        import = cfs_list_entry(obd_zombie_imports.next,
-                                                struct obd_import,
-                                                imp_zombie_chain);
-                        cfs_list_del_init(&import->imp_zombie_chain);
-                }
+               import = NULL;
+               if (!list_empty(&obd_zombie_imports)) {
+                       import = list_entry(obd_zombie_imports.next,
+                                           struct obd_import,
+                                           imp_zombie_chain);
+                       list_del_init(&import->imp_zombie_chain);
+               }
 
-                export = NULL;
-                if (!cfs_list_empty(&obd_zombie_exports)) {
-                        export = cfs_list_entry(obd_zombie_exports.next,
-                                                struct obd_export,
-                                                exp_obd_chain);
-                        cfs_list_del_init(&export->exp_obd_chain);
-                }
+               export = NULL;
+               if (!list_empty(&obd_zombie_exports)) {
+                       export = list_entry(obd_zombie_exports.next,
+                                           struct obd_export,
+                                           exp_obd_chain);
+                       list_del_init(&export->exp_obd_chain);
+               }
 
                spin_unlock(&obd_zombie_impexp_lock);
 
@@ -1700,13 +1692,14 @@ static int obd_zombie_impexp_check(void *arg)
  * Add export to the obd_zombe thread and notify it.
  */
 static void obd_zombie_export_add(struct obd_export *exp) {
+       atomic_dec(&obd_stale_export_num);
        spin_lock(&exp->exp_obd->obd_dev_lock);
-       LASSERT(!cfs_list_empty(&exp->exp_obd_chain));
-       cfs_list_del_init(&exp->exp_obd_chain);
+       LASSERT(!list_empty(&exp->exp_obd_chain));
+       list_del_init(&exp->exp_obd_chain);
        spin_unlock(&exp->exp_obd->obd_dev_lock);
        spin_lock(&obd_zombie_impexp_lock);
        zombies_count++;
-       cfs_list_add(&exp->exp_obd_chain, &obd_zombie_exports);
+       list_add(&exp->exp_obd_chain, &obd_zombie_exports);
        spin_unlock(&obd_zombie_impexp_lock);
 
        obd_zombie_impexp_notify();
@@ -1717,11 +1710,10 @@ static void obd_zombie_export_add(struct obd_export *exp) {
  */
 static void obd_zombie_import_add(struct obd_import *imp) {
        LASSERT(imp->imp_sec == NULL);
-       LASSERT(imp->imp_rq_pool == NULL);
        spin_lock(&obd_zombie_impexp_lock);
-       LASSERT(cfs_list_empty(&imp->imp_zombie_chain));
+       LASSERT(list_empty(&imp->imp_zombie_chain));
        zombies_count++;
-       cfs_list_add(&imp->imp_zombie_chain, &obd_zombie_imports);
+       list_add(&imp->imp_zombie_chain, &obd_zombie_imports);
        spin_unlock(&obd_zombie_impexp_lock);
 
        obd_zombie_impexp_notify();
@@ -1768,7 +1760,76 @@ void obd_zombie_barrier(void)
 }
 EXPORT_SYMBOL(obd_zombie_barrier);
 
-#ifdef __KERNEL__
+
+struct obd_export *obd_stale_export_get(void)
+{
+       struct obd_export *exp = NULL;
+       ENTRY;
+
+       spin_lock(&obd_stale_export_lock);
+       if (!list_empty(&obd_stale_exports)) {
+               exp = list_entry(obd_stale_exports.next,
+                                struct obd_export, exp_stale_list);
+               list_del_init(&exp->exp_stale_list);
+       }
+       spin_unlock(&obd_stale_export_lock);
+
+       if (exp) {
+               CDEBUG(D_DLMTRACE, "Get export %p: total %d\n", exp,
+                      atomic_read(&obd_stale_export_num));
+       }
+       RETURN(exp);
+}
+EXPORT_SYMBOL(obd_stale_export_get);
+
+void obd_stale_export_put(struct obd_export *exp)
+{
+       ENTRY;
+
+       LASSERT(list_empty(&exp->exp_stale_list));
+       if (exp->exp_lock_hash &&
+           atomic_read(&exp->exp_lock_hash->hs_count)) {
+               CDEBUG(D_DLMTRACE, "Put export %p: total %d\n", exp,
+                      atomic_read(&obd_stale_export_num));
+
+               spin_lock_bh(&exp->exp_bl_list_lock);
+               spin_lock(&obd_stale_export_lock);
+               /* Add to the tail if there is no blocked locks,
+                * to the head otherwise. */
+               if (list_empty(&exp->exp_bl_list))
+                       list_add_tail(&exp->exp_stale_list,
+                                     &obd_stale_exports);
+               else
+                       list_add(&exp->exp_stale_list,
+                                &obd_stale_exports);
+
+               spin_unlock(&obd_stale_export_lock);
+               spin_unlock_bh(&exp->exp_bl_list_lock);
+       } else {
+               class_export_put(exp);
+       }
+       EXIT;
+}
+EXPORT_SYMBOL(obd_stale_export_put);
+
+/**
+ * Adjust the position of the export in the stale list,
+ * i.e. move to the head of the list if is needed.
+ **/
+void obd_stale_export_adjust(struct obd_export *exp)
+{
+       LASSERT(exp != NULL);
+       spin_lock_bh(&exp->exp_bl_list_lock);
+       spin_lock(&obd_stale_export_lock);
+
+       if (!list_empty(&exp->exp_stale_list) &&
+           !list_empty(&exp->exp_bl_list))
+               list_move(&exp->exp_stale_list, &obd_stale_exports);
+
+       spin_unlock(&obd_stale_export_lock);
+       spin_unlock_bh(&exp->exp_bl_list_lock);
+}
+EXPORT_SYMBOL(obd_stale_export_adjust);
 
 /**
  * destroy zombie export/import thread.
@@ -1799,59 +1860,28 @@ static int obd_zombie_impexp_thread(void *unused)
        RETURN(0);
 }
 
-#else /* ! KERNEL */
-
-static atomic_t zombie_recur = ATOMIC_INIT(0);
-static void *obd_zombie_impexp_work_cb;
-static void *obd_zombie_impexp_idle_cb;
-
-int obd_zombie_impexp_kill(void *arg)
-{
-        int rc = 0;
-
-       if (atomic_inc_return(&zombie_recur) == 1) {
-                obd_zombie_impexp_cull();
-                rc = 1;
-        }
-       atomic_dec(&zombie_recur);
-        return rc;
-}
-
-#endif
 
 /**
  * start destroy zombie import/export thread
  */
 int obd_zombie_impexp_init(void)
 {
-#ifdef __KERNEL__
        struct task_struct *task;
-#endif
 
-       CFS_INIT_LIST_HEAD(&obd_zombie_imports);
-       CFS_INIT_LIST_HEAD(&obd_zombie_exports);
+       INIT_LIST_HEAD(&obd_zombie_imports);
+
+       INIT_LIST_HEAD(&obd_zombie_exports);
        spin_lock_init(&obd_zombie_impexp_lock);
        init_completion(&obd_zombie_start);
        init_completion(&obd_zombie_stop);
        init_waitqueue_head(&obd_zombie_waitq);
        obd_zombie_pid = 0;
 
-#ifdef __KERNEL__
        task = kthread_run(obd_zombie_impexp_thread, NULL, "obd_zombid");
        if (IS_ERR(task))
                RETURN(PTR_ERR(task));
 
        wait_for_completion(&obd_zombie_start);
-#else
-
-        obd_zombie_impexp_work_cb =
-                liblustre_register_wait_callback("obd_zombi_impexp_kill",
-                                                 &obd_zombie_impexp_kill, NULL);
-
-        obd_zombie_impexp_idle_cb =
-                liblustre_register_idle_callback("obd_zombi_impexp_check",
-                                                 &obd_zombie_impexp_check, NULL);
-#endif
        RETURN(0);
 }
 /**
@@ -1861,12 +1891,7 @@ void obd_zombie_impexp_stop(void)
 {
        set_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags);
         obd_zombie_impexp_notify();
-#ifdef __KERNEL__
        wait_for_completion(&obd_zombie_stop);
-#else
-        liblustre_deregister_wait_callback(obd_zombie_impexp_work_cb);
-        liblustre_deregister_idle_callback(obd_zombie_impexp_idle_cb);
-#endif
 }
 
 /***** Kernel-userspace comm helpers *******/
@@ -1890,21 +1915,6 @@ struct kuc_hdr * kuc_ptr(void *p)
 }
 EXPORT_SYMBOL(kuc_ptr);
 
-/* Test if payload is part of kuc message
- * @param p Pointer to payload area
- * @returns boolean
- */
-int kuc_ispayload(void *p)
-{
-        struct kuc_hdr *kh = ((struct kuc_hdr *)p) - 1;
-
-        if (kh->kuc_magic == KUC_MAGIC)
-                return 1;
-        else
-                return 0;
-}
-EXPORT_SYMBOL(kuc_ispayload);
-
 /* Alloc space for a message, and fill in header
  * @return Pointer to payload area
  */
@@ -1945,9 +1955,9 @@ static bool obd_request_slot_avail(struct client_obd *cli,
 {
        bool avail;
 
-       client_obd_list_lock(&cli->cl_loi_list_lock);
+       spin_lock(&cli->cl_loi_list_lock);
        avail = !!list_empty(&orsw->orsw_entry);
-       client_obd_list_unlock(&cli->cl_loi_list_lock);
+       spin_unlock(&cli->cl_loi_list_lock);
 
        return avail;
 };
@@ -1965,17 +1975,17 @@ int obd_get_request_slot(struct client_obd *cli)
        struct l_wait_info               lwi;
        int                              rc;
 
-       client_obd_list_lock(&cli->cl_loi_list_lock);
+       spin_lock(&cli->cl_loi_list_lock);
        if (cli->cl_r_in_flight < cli->cl_max_rpcs_in_flight) {
                cli->cl_r_in_flight++;
-               client_obd_list_unlock(&cli->cl_loi_list_lock);
+               spin_unlock(&cli->cl_loi_list_lock);
                return 0;
        }
 
        init_waitqueue_head(&orsw.orsw_waitq);
        list_add_tail(&orsw.orsw_entry, &cli->cl_loi_read_list);
        orsw.orsw_signaled = false;
-       client_obd_list_unlock(&cli->cl_loi_list_lock);
+       spin_unlock(&cli->cl_loi_list_lock);
 
        lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
        rc = l_wait_event(orsw.orsw_waitq,
@@ -1985,7 +1995,7 @@ int obd_get_request_slot(struct client_obd *cli)
 
        /* Here, we must take the lock to avoid the on-stack 'orsw' to be
         * freed but other (such as obd_put_request_slot) is using it. */
-       client_obd_list_lock(&cli->cl_loi_list_lock);
+       spin_lock(&cli->cl_loi_list_lock);
        if (rc != 0) {
                if (!orsw.orsw_signaled) {
                        if (list_empty(&orsw.orsw_entry))
@@ -2000,7 +2010,7 @@ int obd_get_request_slot(struct client_obd *cli)
 
                rc = -EINTR;
        }
-       client_obd_list_unlock(&cli->cl_loi_list_lock);
+       spin_unlock(&cli->cl_loi_list_lock);
 
        return rc;
 }
@@ -2010,7 +2020,7 @@ void obd_put_request_slot(struct client_obd *cli)
 {
        struct obd_request_slot_waiter *orsw;
 
-       client_obd_list_lock(&cli->cl_loi_list_lock);
+       spin_lock(&cli->cl_loi_list_lock);
        cli->cl_r_in_flight--;
 
        /* If there is free slot, wakeup the first waiter. */
@@ -2022,7 +2032,7 @@ void obd_put_request_slot(struct client_obd *cli)
                cli->cl_r_in_flight++;
                wake_up(&orsw->orsw_waitq);
        }
-       client_obd_list_unlock(&cli->cl_loi_list_lock);
+       spin_unlock(&cli->cl_loi_list_lock);
 }
 EXPORT_SYMBOL(obd_put_request_slot);
 
@@ -2038,11 +2048,31 @@ int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max)
        __u32                           old;
        int                             diff;
        int                             i;
+       char                            *typ_name;
+       int                             rc;
 
        if (max > OBD_MAX_RIF_MAX || max < 1)
                return -ERANGE;
 
-       client_obd_list_lock(&cli->cl_loi_list_lock);
+       typ_name = cli->cl_import->imp_obd->obd_type->typ_name;
+       if (strcmp(typ_name, LUSTRE_MDC_NAME) == 0) {
+               /* adjust max_mod_rpcs_in_flight to ensure it is always
+                * strictly lower that max_rpcs_in_flight */
+               if (max < 2) {
+                       CERROR("%s: cannot set max_rpcs_in_flight to 1 "
+                              "because it must be higher than "
+                              "max_mod_rpcs_in_flight value",
+                              cli->cl_import->imp_obd->obd_name);
+                       return -ERANGE;
+               }
+               if (max <= cli->cl_max_mod_rpcs_in_flight) {
+                       rc = obd_set_max_mod_rpcs_in_flight(cli, max - 1);
+                       if (rc != 0)
+                               return rc;
+               }
+       }
+
+       spin_lock(&cli->cl_loi_list_lock);
        old = cli->cl_max_rpcs_in_flight;
        cli->cl_max_rpcs_in_flight = max;
        diff = max - old;
@@ -2058,8 +2088,224 @@ int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max)
                cli->cl_r_in_flight++;
                wake_up(&orsw->orsw_waitq);
        }
-       client_obd_list_unlock(&cli->cl_loi_list_lock);
+       spin_unlock(&cli->cl_loi_list_lock);
 
        return 0;
 }
 EXPORT_SYMBOL(obd_set_max_rpcs_in_flight);
+
+__u16 obd_get_max_mod_rpcs_in_flight(struct client_obd *cli)
+{
+       return cli->cl_max_mod_rpcs_in_flight;
+}
+EXPORT_SYMBOL(obd_get_max_mod_rpcs_in_flight);
+
+int obd_set_max_mod_rpcs_in_flight(struct client_obd *cli, __u16 max)
+{
+       struct obd_connect_data *ocd;
+       __u16 maxmodrpcs;
+       __u16 prev;
+
+       if (max > OBD_MAX_RIF_MAX || max < 1)
+               return -ERANGE;
+
+       /* cannot exceed or equal max_rpcs_in_flight */
+       if (max >= cli->cl_max_rpcs_in_flight) {
+               CERROR("%s: can't set max_mod_rpcs_in_flight to a value (%hu) "
+                      "higher or equal to max_rpcs_in_flight value (%u)\n",
+                      cli->cl_import->imp_obd->obd_name,
+                      max, cli->cl_max_rpcs_in_flight);
+               return -ERANGE;
+       }
+
+       /* cannot exceed max modify RPCs in flight supported by the server */
+       ocd = &cli->cl_import->imp_connect_data;
+       if (ocd->ocd_connect_flags & OBD_CONNECT_MULTIMODRPCS)
+               maxmodrpcs = ocd->ocd_maxmodrpcs;
+       else
+               maxmodrpcs = 1;
+       if (max > maxmodrpcs) {
+               CERROR("%s: can't set max_mod_rpcs_in_flight to a value (%hu) "
+                      "higher than max_mod_rpcs_per_client value (%hu) "
+                      "returned by the server at connection\n",
+                      cli->cl_import->imp_obd->obd_name,
+                      max, maxmodrpcs);
+               return -ERANGE;
+       }
+
+       spin_lock(&cli->cl_mod_rpcs_lock);
+
+       prev = cli->cl_max_mod_rpcs_in_flight;
+       cli->cl_max_mod_rpcs_in_flight = max;
+
+       /* wakeup waiters if limit has been increased */
+       if (cli->cl_max_mod_rpcs_in_flight > prev)
+               wake_up(&cli->cl_mod_rpcs_waitq);
+
+       spin_unlock(&cli->cl_mod_rpcs_lock);
+
+       return 0;
+}
+EXPORT_SYMBOL(obd_set_max_mod_rpcs_in_flight);
+
+
+#define pct(a, b) (b ? a * 100 / b : 0)
+int obd_mod_rpc_stats_seq_show(struct client_obd *cli,
+                              struct seq_file *seq)
+{
+       struct timeval now;
+       unsigned long mod_tot = 0, mod_cum;
+       int i;
+
+       do_gettimeofday(&now);
+
+       spin_lock(&cli->cl_mod_rpcs_lock);
+
+       seq_printf(seq, "snapshot_time:         %lu.%lu (secs.usecs)\n",
+                  now.tv_sec, now.tv_usec);
+       seq_printf(seq, "modify_RPCs_in_flight:  %hu\n",
+                  cli->cl_mod_rpcs_in_flight);
+
+       seq_printf(seq, "\n\t\t\tmodify\n");
+       seq_printf(seq, "rpcs in flight        rpcs   %% cum %%\n");
+
+       mod_tot = lprocfs_oh_sum(&cli->cl_mod_rpcs_hist);
+
+       mod_cum = 0;
+       for (i = 0; i < OBD_HIST_MAX; i++) {
+               unsigned long mod = cli->cl_mod_rpcs_hist.oh_buckets[i];
+               mod_cum += mod;
+               seq_printf(seq, "%d:\t\t%10lu %3lu %3lu\n",
+                          i, mod, pct(mod, mod_tot),
+                          pct(mod_cum, mod_tot));
+               if (mod_cum == mod_tot)
+                       break;
+       }
+
+       spin_unlock(&cli->cl_mod_rpcs_lock);
+
+       return 0;
+}
+EXPORT_SYMBOL(obd_mod_rpc_stats_seq_show);
+#undef pct
+
+
+/* The number of modify RPCs sent in parallel is limited
+ * because the server has a finite number of slots per client to
+ * store request result and ensure reply reconstruction when needed.
+ * On the client, this limit is stored in cl_max_mod_rpcs_in_flight
+ * that takes into account server limit and cl_max_rpcs_in_flight
+ * value.
+ * On the MDC client, to avoid a potential deadlock (see Bugzilla 3462),
+ * one close request is allowed above the maximum.
+ */
+static inline bool obd_mod_rpc_slot_avail_locked(struct client_obd *cli,
+                                                bool close_req)
+{
+       bool avail;
+
+       /* A slot is available if
+        * - number of modify RPCs in flight is less than the max
+        * - it's a close RPC and no other close request is in flight
+        */
+       avail = cli->cl_mod_rpcs_in_flight < cli->cl_max_mod_rpcs_in_flight ||
+               (close_req && cli->cl_close_rpcs_in_flight == 0);
+
+       return avail;
+}
+
+static inline bool obd_mod_rpc_slot_avail(struct client_obd *cli,
+                                        bool close_req)
+{
+       bool avail;
+
+       spin_lock(&cli->cl_mod_rpcs_lock);
+       avail = obd_mod_rpc_slot_avail_locked(cli, close_req);
+       spin_unlock(&cli->cl_mod_rpcs_lock);
+       return avail;
+}
+
+/* Get a modify RPC slot from the obd client @cli according
+ * to the kind of operation @opc that is going to be sent
+ * and the intent @it of the operation if it applies.
+ * If the maximum number of modify RPCs in flight is reached
+ * the thread is put to sleep.
+ * Returns the tag to be set in the request message. Tag 0
+ * is reserved for non-modifying requests.
+ */
+__u16 obd_get_mod_rpc_slot(struct client_obd *cli, __u32 opc,
+                          struct lookup_intent *it)
+{
+       struct l_wait_info      lwi = LWI_INTR(NULL, NULL);
+       bool                    close_req = false;
+       __u16                   i, max;
+
+       /* read-only metadata RPCs don't consume a slot on MDT
+        * for reply reconstruction
+        */
+       if (it != NULL && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
+                          it->it_op == IT_LAYOUT || it->it_op == IT_READDIR))
+               return 0;
+
+       if (opc == MDS_CLOSE)
+               close_req = true;
+
+       do {
+               spin_lock(&cli->cl_mod_rpcs_lock);
+               max = cli->cl_max_mod_rpcs_in_flight;
+               if (obd_mod_rpc_slot_avail_locked(cli, close_req)) {
+                       /* there is a slot available */
+                       cli->cl_mod_rpcs_in_flight++;
+                       if (close_req)
+                               cli->cl_close_rpcs_in_flight++;
+                       lprocfs_oh_tally(&cli->cl_mod_rpcs_hist,
+                                        cli->cl_mod_rpcs_in_flight);
+                       /* find a free tag */
+                       i = find_first_zero_bit(cli->cl_mod_tag_bitmap,
+                                               max + 1);
+                       LASSERT(i < OBD_MAX_RIF_MAX);
+                       LASSERT(!test_and_set_bit(i, cli->cl_mod_tag_bitmap));
+                       spin_unlock(&cli->cl_mod_rpcs_lock);
+                       /* tag 0 is reserved for non-modify RPCs */
+                       return i + 1;
+               }
+               spin_unlock(&cli->cl_mod_rpcs_lock);
+
+               CDEBUG(D_RPCTRACE, "%s: sleeping for a modify RPC slot "
+                      "opc %u, max %hu\n",
+                      cli->cl_import->imp_obd->obd_name, opc, max);
+
+               l_wait_event(cli->cl_mod_rpcs_waitq,
+                            obd_mod_rpc_slot_avail(cli, close_req), &lwi);
+       } while (true);
+}
+EXPORT_SYMBOL(obd_get_mod_rpc_slot);
+
+/* Put a modify RPC slot from the obd client @cli according
+ * to the kind of operation @opc that has been sent and the
+ * intent @it of the operation if it applies.
+ */
+void obd_put_mod_rpc_slot(struct client_obd *cli, __u32 opc,
+                         struct lookup_intent *it, __u16 tag)
+{
+       bool                    close_req = false;
+
+       if (it != NULL && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
+                          it->it_op == IT_LAYOUT || it->it_op == IT_READDIR))
+               return;
+
+       if (opc == MDS_CLOSE)
+               close_req = true;
+
+       spin_lock(&cli->cl_mod_rpcs_lock);
+       cli->cl_mod_rpcs_in_flight--;
+       if (close_req)
+               cli->cl_close_rpcs_in_flight--;
+       /* release the tag in the bitmap */
+       LASSERT(tag - 1 < OBD_MAX_RIF_MAX);
+       LASSERT(test_and_clear_bit(tag - 1, cli->cl_mod_tag_bitmap) != 0);
+       spin_unlock(&cli->cl_mod_rpcs_lock);
+       wake_up(&cli->cl_mod_rpcs_waitq);
+}
+EXPORT_SYMBOL(obd_put_mod_rpc_slot);
+