Whamcloud - gitweb
LU-1222 ldlm: Fix the race in AST sender vs multiple arriving RPCs
[fs/lustre-release.git] / lustre / mgs / mgs_handler.c
index cb0ff23..beb0b95 100644 (file)
  * GPL HEADER END
  */
 /*
- * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -42,7 +44,7 @@
 # define EXPORT_SYMTAB
 #endif
 #define DEBUG_SUBSYSTEM S_MGS
-#define D_MGS D_CONFIG/*|D_WARNING*/
+#define D_MGS D_CONFIG
 
 #ifdef __KERNEL__
 # include <linux/module.h>
@@ -59,7 +61,7 @@
 #include <lustre_fsfilt.h>
 #include <lustre_disk.h>
 #include "mgs_internal.h"
-
+#include <lustre_param.h>
 
 /* Establish a connection to the MGS.*/
 static int mgs_connect(const struct lu_env *env,
@@ -90,7 +92,7 @@ static int mgs_connect(const struct lu_env *env,
                 data->ocd_version = LUSTRE_VERSION_CODE;
         }
 
-        rc = mgs_client_add(obd, lexp, localdata);
+        rc = mgs_export_stats_init(obd, lexp, localdata);
 
         if (rc) {
                 class_disconnect(lexp);
@@ -119,7 +121,7 @@ static int mgs_reconnect(const struct lu_env *env,
                 data->ocd_version = LUSTRE_VERSION_CODE;
         }
 
-        RETURN(0);
+        RETURN(mgs_export_stats_init(obd, exp, localdata));
 }
 
 static int mgs_disconnect(struct obd_export *exp)
@@ -129,32 +131,12 @@ static int mgs_disconnect(struct obd_export *exp)
 
         LASSERT(exp);
 
+        mgs_fsc_cleanup(exp);
+
         class_export_get(exp);
         mgs_counter_incr(exp, LPROC_MGS_DISCONNECT);
 
-        /* Disconnect early so that clients can't keep using export */
-        rc = class_disconnect(exp);
-        ldlm_cancel_locks_for_export(exp);
-
-        lprocfs_exp_cleanup(exp);
-
-        /* complete all outstanding replies */
-        spin_lock(&exp->exp_lock);
-        while (!list_empty(&exp->exp_outstanding_replies)) {
-                struct ptlrpc_reply_state *rs =
-                        list_entry(exp->exp_outstanding_replies.next,
-                                   struct ptlrpc_reply_state, rs_exp_list);
-                struct ptlrpc_service *svc = rs->rs_service;
-
-                spin_lock(&svc->srv_lock);
-                list_del_init(&rs->rs_exp_list);
-                spin_lock(&rs->rs_lock);
-                ptlrpc_schedule_difficult_reply(rs);
-                spin_unlock(&rs->rs_lock);
-                spin_unlock(&svc->srv_lock);
-        }
-        spin_unlock(&exp->exp_lock);
-
+        rc = server_disconnect_export(exp);
         class_export_put(exp);
         RETURN(rc);
 }
@@ -217,9 +199,14 @@ static int mgs_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
                 GOTO(err_ops, rc = -EROFS);
         }
 
+        obd->u.obt.obt_magic = OBT_MAGIC;
+        obd->u.obt.obt_instance = 0;
+
         /* namespace for mgs llog */
-        obd->obd_namespace = ldlm_namespace_new(obd ,"MGS", LDLM_NAMESPACE_SERVER,
-                                                LDLM_NAMESPACE_MODEST);
+        obd->obd_namespace = ldlm_namespace_new(obd ,"MGS",
+                                                LDLM_NAMESPACE_SERVER,
+                                                LDLM_NAMESPACE_MODEST,
+                                                LDLM_NS_TYPE_MGT);
         if (obd->obd_namespace == NULL)
                 GOTO(err_ops, rc = -ENOMEM);
 
@@ -243,7 +230,17 @@ static int mgs_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
 
         /* Internal mgs setup */
         mgs_init_fsdb_list(obd);
-        sema_init(&mgs->mgs_sem, 1);
+        cfs_sema_init(&mgs->mgs_sem, 1);
+        mgs->mgs_start_time = cfs_time_current_sec();
+
+        /* Setup proc */
+        lprocfs_mgs_init_vars(&lvars);
+        if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0) {
+                lproc_mgs_setup(obd);
+                rc = lprocfs_alloc_md_stats(obd, LPROC_MGS_LAST);
+                if (rc)
+                        GOTO(err_llog, rc);
+        }
 
         /* Start the service threads */
         mgs->mgs_service =
@@ -260,25 +257,20 @@ static int mgs_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
                 GOTO(err_llog, rc = -ENOMEM);
         }
 
-        rc = ptlrpc_start_threads(obd, mgs->mgs_service);
+        rc = ptlrpc_start_threads(mgs->mgs_service);
         if (rc)
                 GOTO(err_thread, rc);
 
-        /* Setup proc */
-        lprocfs_mgs_init_vars(&lvars);
-        if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0) {
-                lproc_mgs_setup(obd);
-        }
-
         ping_evictor_start();
 
-        LCONSOLE_INFO("MGS %s started\n", obd->obd_name);
+        CDEBUG(D_INFO, "MGS %s started\n", obd->obd_name);
 
         RETURN(0);
 
 err_thread:
         ptlrpc_unregister_service(mgs->mgs_service);
 err_llog:
+        lproc_mgs_cleanup(obd);
         obd_llog_finish(obd, 0);
 err_fs:
         /* No extra cleanup needed for llog_init_commit_thread() */
@@ -296,13 +288,19 @@ err_put:
 
 static int mgs_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
 {
+        struct mgs_obd *mgs = &obd->u.mgs;
         int rc = 0;
         ENTRY;
 
         switch (stage) {
         case OBD_CLEANUP_EARLY:
+                break;
         case OBD_CLEANUP_EXPORTS:
+                ping_evictor_stop();
+                ptlrpc_unregister_service(mgs->mgs_service);
+                mgs_cleanup_fsdb_list(obd);
                 rc = obd_llog_finish(obd, 0);
+                lproc_mgs_cleanup(obd);
                 break;
         }
         RETURN(rc);
@@ -319,12 +317,6 @@ static int mgs_cleanup(struct obd_device *obd)
         if (mgs->mgs_sb == NULL)
                 RETURN(0);
 
-        ping_evictor_stop();
-
-        ptlrpc_unregister_service(mgs->mgs_service);
-
-        mgs_cleanup_fsdb_list(obd);
-        lproc_mgs_cleanup(obd);
         mgs_fs_cleanup(obd);
 
         server_put_mount(obd->obd_name, mgs->mgs_vfsmnt);
@@ -339,47 +331,89 @@ static int mgs_cleanup(struct obd_device *obd)
         RETURN(0);
 }
 
-/* similar to filter_prepare_destroy */
-static int mgs_get_cfg_lock(struct obd_device *obd, char *fsname,
-                            struct lustre_handle *lockh)
+static int mgs_completion_ast_config(struct ldlm_lock *lock, int flags,
+                                     void *cbdata)
 {
-        struct ldlm_res_id res_id;
-        int rc, flags = 0;
         ENTRY;
 
-        rc = mgc_fsname2resid(fsname, &res_id);
-        if (!rc)
-                rc = ldlm_cli_enqueue_local(obd->obd_namespace, &res_id,
-                                            LDLM_PLAIN, NULL, LCK_EX,
-                                            &flags, ldlm_blocking_ast,
-                                            ldlm_completion_ast, NULL,
-                                            fsname, 0, NULL, NULL, lockh);
-        if (rc)
-                CERROR("can't take cfg lock for %s (%d)\n", fsname, rc);
+        if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
+                       LDLM_FL_BLOCK_CONV))) {
+                struct fs_db *fsdb = (struct fs_db *)lock->l_ast_data;
+                struct lustre_handle lockh;
 
-        RETURN(rc);
+                /* clear the bit before lock put */
+                cfs_clear_bit(FSDB_REVOKING_LOCK, &fsdb->fsdb_flags);
+
+                ldlm_lock2handle(lock, &lockh);
+                ldlm_lock_decref_and_cancel(&lockh, LCK_EX);
+        }
+
+        RETURN(ldlm_completion_ast(lock, flags, cbdata));
 }
 
-static int mgs_put_cfg_lock(struct lustre_handle *lockh)
+static int mgs_completion_ast_ir(struct ldlm_lock *lock, int flags,
+                                 void *cbdata)
 {
         ENTRY;
-        ldlm_lock_decref(lockh, LCK_EX);
-        RETURN(0);
+
+        if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
+                       LDLM_FL_BLOCK_CONV))) {
+                struct fs_db *fsdb = (struct fs_db *)lock->l_ast_data;
+                struct lustre_handle lockh;
+
+                mgs_ir_notify_complete(fsdb);
+
+                ldlm_lock2handle(lock, &lockh);
+                ldlm_lock_decref_and_cancel(&lockh, LCK_EX);
+        }
+
+        RETURN(ldlm_completion_ast(lock, flags, cbdata));
 }
 
-static void mgs_revoke_lock(struct obd_device *obd, char *fsname,
-                            struct lustre_handle *lockh)
+void mgs_revoke_lock(struct obd_device *obd, struct fs_db *fsdb, int type)
 {
-        int lockrc;
-
-        if (fsname[0]) {
-                lockrc = mgs_get_cfg_lock(obd, fsname, lockh);
-                if (lockrc != ELDLM_OK)
-                        CERROR("lock error %d for fs %s\n", lockrc,
-                               fsname);
-                else
-                        mgs_put_cfg_lock(lockh);
+        ldlm_completion_callback cp = NULL;
+        struct lustre_handle     lockh = { 0 };
+        struct ldlm_res_id       res_id;
+        int flags = LDLM_FL_ATOMIC_CB;
+        int rc;
+        ENTRY;
+
+        LASSERT(fsdb->fsdb_name[0] != '\0');
+        rc = mgc_fsname2resid(fsdb->fsdb_name, &res_id, type);
+        LASSERT(rc == 0);
+
+        switch (type) {
+        case CONFIG_T_CONFIG:
+                cp = mgs_completion_ast_config;
+                if (cfs_test_and_set_bit(FSDB_REVOKING_LOCK, &fsdb->fsdb_flags))
+                        rc = -EALREADY;
+                break;
+        case CONFIG_T_RECOVER:
+                cp = mgs_completion_ast_ir;
+        default:
+                break;
         }
+
+        if (!rc) {
+                LASSERT(cp != NULL);
+                rc = ldlm_cli_enqueue_local(obd->obd_namespace, &res_id,
+                                            LDLM_PLAIN, NULL, LCK_EX, &flags,
+                                            ldlm_blocking_ast, cp, NULL,
+                                            fsdb, 0, NULL, &lockh);
+                if (rc != ELDLM_OK) {
+                        CERROR("can't take cfg lock for "LPX64"/"LPX64"(%d)\n",
+                               le64_to_cpu(res_id.name[0]),
+                               le64_to_cpu(res_id.name[1]), rc);
+
+                        if (type == CONFIG_T_CONFIG)
+                                cfs_clear_bit(FSDB_REVOKING_LOCK,
+                                              &fsdb->fsdb_flags);
+                }
+                /* lock has been cancelled in completion_ast. */
+        }
+
+        RETURN_EXIT;
 }
 
 /* rc=0 means ok
@@ -413,18 +447,67 @@ static int mgs_check_target(struct obd_device *obd, struct mgs_target_info *mti)
         RETURN(rc);
 }
 
+/* Ensure this is not a failover node that is connecting first*/
+static int mgs_check_failover_reg(struct mgs_target_info *mti)
+{
+        lnet_nid_t nid;
+        char *ptr;
+        int i;
+
+        ptr = mti->mti_params;
+        while (class_find_param(ptr, PARAM_FAILNODE, &ptr) == 0) {
+                while (class_parse_nid(ptr, &nid, &ptr) == 0) {
+                        for (i = 0; i < mti->mti_nid_count; i++) {
+                                if (nid == mti->mti_nids[i]) {
+                                        LCONSOLE_WARN("Denying initial registra"
+                                                      "tion attempt from nid %s"
+                                                      ", specified as failover"
+                                                      "\n",libcfs_nid2str(nid));
+                                        return -EADDRNOTAVAIL;
+                                }
+                        }
+                }
+        }
+        return 0;
+}
+
 /* Called whenever a target starts up.  Flags indicate first connect, etc. */
 static int mgs_handle_target_reg(struct ptlrpc_request *req)
 {
         struct obd_device *obd = req->rq_export->exp_obd;
-        struct lustre_handle lockh;
         struct mgs_target_info *mti, *rep_mti;
-        int rc = 0, lockrc;
+        struct fs_db *fsdb;
+        int opc;
+        int rc = 0;
         ENTRY;
 
         mgs_counter_incr(req->rq_export, LPROC_MGS_TARGET_REG);
 
         mti = req_capsule_client_get(&req->rq_pill, &RMF_MGS_TARGET_INFO);
+
+        opc = mti->mti_flags & LDD_F_OPC_MASK;
+        if (opc == LDD_F_OPC_READY) {
+                CDEBUG(D_MGS, "fs: %s index: %d is ready to reconnect.\n",
+                       mti->mti_fsname, mti->mti_stripe_index);
+                rc = mgs_ir_update(obd, mti);
+                if (rc) {
+                        LASSERT(!(mti->mti_flags & LDD_F_IR_CAPABLE));
+                        CERROR("Update IR return with %d(ignore and IR "
+                               "disabled)\n", rc);
+                }
+                GOTO(out_nolock, rc);
+        }
+
+        /* Do not support unregistering right now. */
+        if (opc != LDD_F_OPC_REG)
+                GOTO(out_nolock, rc = -EINVAL);
+
+        CDEBUG(D_MGS, "fs: %s index: %d is registered to MGS.\n",
+               mti->mti_fsname, mti->mti_stripe_index);
+
+        if (mti->mti_flags & LDD_F_NEED_INDEX)
+                mti->mti_flags |= LDD_F_WRITECONF;
+
         if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPGRADE14 |
                                 LDD_F_UPDATE))) {
                 /* We're just here as a startup ping. */
@@ -435,26 +518,14 @@ static int mgs_handle_target_reg(struct ptlrpc_request *req)
                 if (rc <= 0)
                         /* Nothing wrong, or fatal error */
                         GOTO(out_nolock, rc);
-        }
-
-        /* Revoke the config lock to make sure nobody is reading. */
-        /* Although actually I think it should be alright if
-           someone was reading while we were updating the logs - if we
-           revoke at the end they will just update from where they left off. */
-        lockrc = mgs_get_cfg_lock(obd, mti->mti_fsname, &lockh);
-        if (lockrc != ELDLM_OK) {
-                LCONSOLE_ERROR_MSG(0x13d, "%s: Can't signal other nodes to "
-                                   "update their configuration (%d). Updating "
-                                   "local logs anyhow; you might have to "
-                                   "manually restart other nodes to get the "
-                                   "latest configuration.\n",
-                                   obd->obd_name, lockrc);
+        } else {
+                if (!(mti->mti_flags & LDD_F_NO_PRIMNODE)
+                    && (rc = mgs_check_failover_reg(mti)))
+                        GOTO(out_nolock, rc);
         }
 
         OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_PAUSE_TARGET_REG, 10);
 
-        /* Log writing contention is handled by the fsdb_sem */
-
         if (mti->mti_flags & LDD_F_WRITECONF) {
                 if (mti->mti_flags & LDD_F_SV_TYPE_MDT &&
                     mti->mti_stripe_index == 0) {
@@ -475,9 +546,23 @@ static int mgs_handle_target_reg(struct ptlrpc_request *req)
                 mti->mti_flags &= ~LDD_F_UPGRADE14;
         }
 
+        rc = mgs_find_or_make_fsdb(obd, mti->mti_fsname, &fsdb);
+        if (rc) {
+                CERROR("Can't get db for %s: %d\n", mti->mti_fsname, rc);
+                GOTO(out_nolock, rc);
+        }
+
+        /*
+         * Log writing contention is handled by the fsdb_sem.
+         *
+         * It should be alright if someone was reading while we were
+         * updating the logs - if we revoke at the end they will just update
+         * from where they left off.
+         */
+
         /* COMPAT_146 */
         if (mti->mti_flags & LDD_F_UPGRADE14) {
-                rc = mgs_upgrade_sv_14(obd, mti);
+                rc = mgs_upgrade_sv_14(obd, mti, fsdb);
                 if (rc) {
                         CERROR("Can't upgrade from 1.4 (%d)\n", rc);
                         GOTO(out, rc);
@@ -494,7 +579,7 @@ static int mgs_handle_target_reg(struct ptlrpc_request *req)
 
                 /* create or update the target log
                    and update the client/mdt logs */
-                rc = mgs_write_log_target(obd, mti);
+                rc = mgs_write_log_target(obd, mti, fsdb);
                 if (rc) {
                         CERROR("Failed to write %s log (%d)\n",
                                mti->mti_svname, rc);
@@ -508,12 +593,17 @@ static int mgs_handle_target_reg(struct ptlrpc_request *req)
         }
 
 out:
-        /* done with log update */
-        if (lockrc == ELDLM_OK)
-                mgs_put_cfg_lock(&lockh);
+        mgs_revoke_lock(obd, fsdb, CONFIG_T_CONFIG);
+
 out_nolock:
         CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname,
                mti->mti_stripe_index, rc);
+        req->rq_status = rc;
+        if (rc)
+                /* we need an error flag to tell the target what's going on,
+                 * instead of just doing it by error code only. */
+                mti->mti_flags |= LDD_F_ERROR;
+
         rc = req_capsule_server_pack(&req->rq_pill);
         if (rc)
                 RETURN(rc);
@@ -531,7 +621,6 @@ static int mgs_set_info_rpc(struct ptlrpc_request *req)
 {
         struct obd_device *obd = req->rq_export->exp_obd;
         struct mgs_send_param *msp, *rep_msp;
-        struct lustre_handle lockh;
         int rc;
         struct lustre_cfg_bufs bufs;
         struct lustre_cfg *lcfg;
@@ -552,9 +641,6 @@ static int mgs_set_info_rpc(struct ptlrpc_request *req)
                 RETURN(rc);
         }
 
-        /* request for update */
-        mgs_revoke_lock(obd, fsname, &lockh);
-
         lustre_cfg_free(lcfg);
 
         rc = req_capsule_server_pack(&req->rq_pill);
@@ -565,6 +651,33 @@ static int mgs_set_info_rpc(struct ptlrpc_request *req)
         RETURN(rc);
 }
 
+static int mgs_config_read(struct ptlrpc_request *req)
+{
+        struct mgs_config_body *body;
+        int rc;
+        ENTRY;
+
+        body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY);
+        if (body == NULL)
+                RETURN(-EINVAL);
+
+        switch (body->mcb_type) {
+        case CONFIG_T_RECOVER:
+                rc = mgs_get_ir_logs(req);
+                break;
+
+        case CONFIG_T_CONFIG:
+                rc = -ENOTSUPP;
+                break;
+
+        default:
+                rc = -EINVAL;
+                break;
+        }
+
+        RETURN(rc);
+}
+
 /*
  * similar as in ost_connect_check_sptlrpc()
  */
@@ -581,7 +694,7 @@ static int mgs_connect_check_sptlrpc(struct ptlrpc_request *req)
                 if (rc)
                         return rc;
 
-                down(&fsdb->fsdb_sem);
+                cfs_down(&fsdb->fsdb_sem);
                 if (sptlrpc_rule_set_choose(&fsdb->fsdb_srpc_gen,
                                             LUSTRE_SP_MGC, LUSTRE_SP_MGS,
                                             req->rq_peer.nid,
@@ -589,9 +702,9 @@ static int mgs_connect_check_sptlrpc(struct ptlrpc_request *req)
                         /* by defualt allow any flavors */
                         flvr.sf_rpc = SPTLRPC_FLVR_ANY;
                 }
-                up(&fsdb->fsdb_sem);
+                cfs_up(&fsdb->fsdb_sem);
 
-                spin_lock(&exp->exp_lock);
+                cfs_spin_lock(&exp->exp_lock);
 
                 exp->exp_sp_peer = req->rq_sp_from;
                 exp->exp_flvr = flvr;
@@ -604,7 +717,7 @@ static int mgs_connect_check_sptlrpc(struct ptlrpc_request *req)
                         rc = -EACCES;
                 }
 
-                spin_unlock(&exp->exp_lock);
+                cfs_spin_unlock(&exp->exp_lock);
         } else {
                 if (exp->exp_sp_peer != req->rq_sp_from) {
                         CERROR("RPC source %s doesn't match %s\n",
@@ -636,6 +749,52 @@ static int mgs_handle_exception(struct ptlrpc_request *req)
         RETURN(0);
 }
 
+/*
+ * For old clients there is no direct way of knowing which filesystems
+ * a client is operating at the MGS side. But we need to pick up those
+ * clients so that the MGS can mark the corresponding filesystem as
+ * non-IR capable because old clients are not ready to be notified.
+ *
+ * This is why we have this _hack_ function. We detect the filesystem's
+ * name by hacking llog operation which is currently used by the clients
+ * to fetch configuration logs. At present this is fine because this is
+ * the ONLY llog operation between mgc and the MGS.
+ *
+ * If extra llog operation is going to be added, this function needs fixing.
+ *
+ * If releases prior than 2.0 are not supported, we can remove this function.
+ */
+static int mgs_handle_fslog_hack(struct ptlrpc_request *req)
+{
+        char *logname;
+        char fsname[16];
+        char *ptr;
+        int rc;
+
+        /* XXX: We suppose that llog at mgs is only used for
+         * fetching file system log */
+        logname = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
+        if (logname == NULL) {
+                CERROR("No logname, is llog on MGS used for something else?\n");
+                return -EINVAL;
+        }
+
+        ptr = strchr(logname, '-');
+        rc = (int)(ptr - logname);
+        if (ptr == NULL || rc >= sizeof(fsname)) {
+                CERROR("Invalid logname received: %s\n", logname);
+                return -EINVAL;
+        }
+
+        strncpy(fsname, logname, rc);
+        fsname[rc] = 0;
+        rc = mgs_fsc_attach(req->rq_export, fsname);
+        if (rc < 0 && rc != -EEXIST)
+                CERROR("add fs client %s returns %d\n", fsname, rc);
+
+        return rc;
+}
+
 /* TODO: handle requests in a similar way as MDT: see mdt_handle_common() */
 int mgs_handle(struct ptlrpc_request *req)
 {
@@ -644,8 +803,8 @@ int mgs_handle(struct ptlrpc_request *req)
         ENTRY;
 
         req_capsule_init(&req->rq_pill, req, RCL_SERVER);
-        OBD_FAIL_TIMEOUT_MS(OBD_FAIL_MGS_PAUSE_REQ, obd_fail_val);
-        if (OBD_FAIL_CHECK(OBD_FAIL_MGS_ALL_REQUEST_NET))
+        CFS_FAIL_TIMEOUT_MS(OBD_FAIL_MGS_PAUSE_REQ, cfs_fail_val);
+        if (CFS_FAIL_CHECK(OBD_FAIL_MGS_ALL_REQUEST_NET))
                 RETURN(0);
 
         LASSERT(current->journal_info == NULL);
@@ -657,9 +816,8 @@ int mgs_handle(struct ptlrpc_request *req)
                 GOTO(out, rc = 0);
 
         if (opc != MGS_CONNECT) {
-                if (req->rq_export == NULL) {
-                        CERROR("lustre_mgs: operation %d on unconnected MGS\n",
-                               opc);
+                if (!class_connected_export(req->rq_export)) {
+                        DEBUG_REQ(D_MGS, req, "operation on unconnected MGS\n");
                         req->rq_status = -ENOTCONN;
                         GOTO(out, rc = -ENOTCONN);
                 }
@@ -705,7 +863,11 @@ int mgs_handle(struct ptlrpc_request *req)
                 req_capsule_set(&req->rq_pill, &RQF_MGS_SET_INFO);
                 rc = mgs_set_info_rpc(req);
                 break;
-
+        case MGS_CONFIG_READ:
+                DEBUG_REQ(D_MGS, req, "read config");
+                req_capsule_set(&req->rq_pill, &RQF_MGS_CONFIG_READ);
+                rc = mgs_config_read(req);
+                break;
         case LDLM_ENQUEUE:
                 DEBUG_REQ(D_MGS, req, "enqueue");
                 req_capsule_set(&req->rq_pill, &RQF_LDLM_ENQUEUE);
@@ -733,6 +895,8 @@ int mgs_handle(struct ptlrpc_request *req)
                 DEBUG_REQ(D_MGS, req, "llog_init");
                 req_capsule_set(&req->rq_pill, &RQF_LLOG_ORIGIN_HANDLE_CREATE);
                 rc = llog_origin_handle_create(req);
+                if (rc == 0)
+                        (void)mgs_handle_fslog_hack(req);
                 break;
         case LLOG_ORIGIN_HANDLE_NEXT_BLOCK:
                 DEBUG_REQ(D_MGS, req, "llog next block");
@@ -773,10 +937,20 @@ out:
 
 static inline int mgs_init_export(struct obd_export *exp)
 {
-        spin_lock(&exp->exp_lock);
+        struct mgs_export_data *data = &exp->u.eu_mgs_data;
+
+        /* init mgs_export_data for fsc */
+        cfs_spin_lock_init(&data->med_lock);
+        CFS_INIT_LIST_HEAD(&data->med_clients);
+
+        cfs_spin_lock(&exp->exp_lock);
         exp->exp_connecting = 1;
-        spin_unlock(&exp->exp_lock);
+        cfs_spin_unlock(&exp->exp_lock);
 
+        /* self-export doesn't need client data and ldlm initialization */
+        if (unlikely(obd_uuid_equals(&exp->exp_obd->obd_uuid,
+                                     &exp->exp_client_uuid)))
+                return 0;
         return ldlm_init_export(exp);
 }
 
@@ -786,6 +960,11 @@ static inline int mgs_destroy_export(struct obd_export *exp)
 
         target_destroy_export(exp);
         mgs_client_free(exp);
+
+        if (unlikely(obd_uuid_equals(&exp->exp_obd->obd_uuid,
+                                     &exp->exp_client_uuid)))
+                RETURN(0);
+
         ldlm_destroy_export(exp);
 
         RETURN(0);
@@ -813,7 +992,6 @@ static int mgs_iocontrol_pool(struct obd_device *obd,
                               struct obd_ioctl_data *data)
 {
         int rc;
-        struct lustre_handle lockh;
         struct lustre_cfg *lcfg = NULL;
         struct llog_rec_hdr rec;
         char *fsname = NULL;
@@ -848,7 +1026,7 @@ static int mgs_iocontrol_pool(struct obd_device *obd,
         if (lcfg == NULL)
                 GOTO(out_pool, rc = -ENOMEM);
 
-        if (copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1))
+        if (cfs_copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1))
                 GOTO(out_pool, rc = -EFAULT);
 
         if (lcfg->lcfg_bufcount < 2) {
@@ -900,9 +1078,6 @@ static int mgs_iocontrol_pool(struct obd_device *obd,
                 GOTO(out_pool, rc);
         }
 
-        /* request for update */
-        mgs_revoke_lock(obd, fsname, &lockh);
-
 out_pool:
         if (lcfg != NULL)
                 OBD_FREE(lcfg, data->ioc_plen1);
@@ -931,7 +1106,6 @@ int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
         switch (cmd) {
 
         case OBD_IOC_PARAM: {
-                struct lustre_handle lockh;
                 struct lustre_cfg *lcfg;
                 struct llog_rec_hdr rec;
                 char fsname[MTI_NAME_MAXLEN];
@@ -948,7 +1122,7 @@ int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                 OBD_ALLOC(lcfg, data->ioc_plen1);
                 if (lcfg == NULL)
                         RETURN(-ENOMEM);
-                if (copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1))
+                if (cfs_copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1))
                         GOTO(out_free, rc = -EFAULT);
 
                 if (lcfg->lcfg_bufcount < 1)
@@ -959,13 +1133,6 @@ int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                         CERROR("setparam err %d\n", rc);
                         GOTO(out_free, rc);
                 }
-
-                /* Revoke lock so everyone updates.  Should be alright if
-                   someone was already reading while we were updating the logs,
-                   so we don't really need to hold the lock while we're
-                   writing (above). */
-                mgs_revoke_lock(obd, fsname, &lockh);
-
 out_free:
                 OBD_FREE(lcfg, data->ioc_plen1);
                 RETURN(rc);