Whamcloud - gitweb
LU-4200 mgs: fix counting of mgs exports
[fs/lustre-release.git] / lustre / mgs / mgs_llog.c
index d20b59e..820acba 100644 (file)
@@ -27,7 +27,7 @@
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, Whamcloud, Inc.
+ * Copyright (c) 2011, 2013, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -46,7 +46,6 @@
 #define D_MGS D_CONFIG
 
 #include <obd.h>
-#include <obd_lov.h>
 #include <lustre_param.h>
 #include <lustre_sec.h>
 #include <lustre_quota.h>
@@ -67,9 +66,6 @@ int class_dentry_readdir(const struct lu_env *env,
 
        CFS_INIT_LIST_HEAD(list);
 
-       if (!dt_try_as_dir(env, dir))
-               GOTO(out, rc = -ENOTDIR);
-
        LASSERT(dir);
        LASSERT(dir->do_index_ops);
 
@@ -122,7 +118,6 @@ next:
 
 fini:
        iops->fini(env, it);
-out:
        if (rc)
                CERROR("%s: key failed when listing %s: rc = %d\n",
                       mgs->mgs_obd->obd_name, MOUNT_CONFIGS_DIR, rc);
@@ -548,11 +543,6 @@ static int mgs_set_index(const struct lu_env *env,
                 imap = fsdb->fsdb_ost_index_map;
         } else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
                 imap = fsdb->fsdb_mdt_index_map;
-                if (fsdb->fsdb_mdt_count >= MAX_MDT_COUNT) {
-                        LCONSOLE_ERROR_MSG(0x13f, "The max mdt count"
-                                           "is %d\n", (int)MAX_MDT_COUNT);
-                       GOTO(out_up, rc = -ERANGE);
-                }
         } else {
                GOTO(out_up, rc = -EINVAL);
         }
@@ -616,9 +606,8 @@ static int mgs_modify_handler(const struct lu_env *env,
 {
        struct mgs_modify_lookup *mml = data;
         struct cfg_marker *marker;
-        struct lustre_cfg *lcfg = (struct lustre_cfg *)(rec + 1);
-        int cfg_len = rec->lrh_len - sizeof(struct llog_rec_hdr) -
-                sizeof(struct llog_rec_tail);
+       struct lustre_cfg *lcfg = REC_DATA(rec);
+       int cfg_len = REC_DATA_LEN(rec);
         int rc;
         ENTRY;
 
@@ -703,8 +692,14 @@ static int mgs_modify(const struct lu_env *env, struct mgs_device *mgs,
         OBD_ALLOC_PTR(mml);
         if (!mml)
                 GOTO(out_close, rc = -ENOMEM);
-        strcpy(mml->mml_marker.cm_comment, comment);
-        strcpy(mml->mml_marker.cm_tgtname, devname);
+       if (strlcpy(mml->mml_marker.cm_comment, comment,
+                   sizeof(mml->mml_marker.cm_comment)) >=
+           sizeof(mml->mml_marker.cm_comment))
+               GOTO(out_free, rc = -E2BIG);
+       if (strlcpy(mml->mml_marker.cm_tgtname, devname,
+                   sizeof(mml->mml_marker.cm_tgtname)) >=
+           sizeof(mml->mml_marker.cm_tgtname))
+               GOTO(out_free, rc = -E2BIG);
         /* Modify mostly means cancel */
         mml->mml_marker.cm_flags = flags;
         mml->mml_marker.cm_canceltime = flags ? cfs_time_current_sec() : 0;
@@ -713,6 +708,8 @@ static int mgs_modify(const struct lu_env *env, struct mgs_device *mgs,
                          NULL);
        if (!rc && !mml->mml_modified)
                rc = 1;
+
+out_free:
         OBD_FREE_PTR(mml);
 
 out_close:
@@ -725,7 +722,66 @@ out_pop:
         RETURN(rc);
 }
 
-/******************** config log recording functions *********************/
+/** This structure is passed to mgs_replace_handler */
+struct mgs_replace_uuid_lookup {
+       /* Nids are replaced for this target device */
+       struct mgs_target_info target;
+       /* Temporary modified llog */
+       struct llog_handle *temp_llh;
+       /* Flag is set if in target block*/
+       int in_target_device;
+       /* Nids already added. Just skip (multiple nids) */
+       int device_nids_added;
+       /* Flag is set if this block should not be copied */
+       int skip_it;
+};
+
+/**
+ * Check: a) if block should be skipped
+ * b) is it target block
+ *
+ * \param[in] lcfg
+ * \param[in] mrul
+ *
+ * \retval 0 should not to be skipped
+ * \retval 1 should to be skipped
+ */
+static int check_markers(struct lustre_cfg *lcfg,
+                        struct mgs_replace_uuid_lookup *mrul)
+{
+        struct cfg_marker *marker;
+
+       /* Track markers. Find given device */
+       if (lcfg->lcfg_command == LCFG_MARKER) {
+               marker = lustre_cfg_buf(lcfg, 1);
+               /* Clean llog from records marked as CM_EXCLUDE.
+                  CM_SKIP records are used for "active" command
+                  and can be restored if needed */
+               if ((marker->cm_flags & (CM_EXCLUDE | CM_START)) ==
+                   (CM_EXCLUDE | CM_START)) {
+                       mrul->skip_it = 1;
+                       return 1;
+               }
+
+               if ((marker->cm_flags & (CM_EXCLUDE | CM_END)) ==
+                   (CM_EXCLUDE | CM_END)) {
+                       mrul->skip_it = 0;
+                       return 1;
+               }
+
+               if (strcmp(mrul->target.mti_svname, marker->cm_tgtname) == 0) {
+                       LASSERT(!(marker->cm_flags & CM_START) ||
+                               !(marker->cm_flags & CM_END));
+                       if (marker->cm_flags & CM_START) {
+                               mrul->in_target_device = 1;
+                               mrul->device_nids_added = 0;
+                       } else if (marker->cm_flags & CM_END)
+                               mrul->in_target_device = 0;
+               }
+       }
+
+       return 0;
+}
 
 static int record_lcfg(const struct lu_env *env, struct llog_handle *llh,
                       struct lustre_cfg *lcfg)
@@ -787,13 +843,11 @@ static int record_base(const struct lu_env *env, struct llog_handle *llh,
        return rc;
 }
 
-
 static inline int record_add_uuid(const struct lu_env *env,
                                  struct llog_handle *llh,
                                  uint64_t nid, char *uuid)
 {
        return record_base(env, llh, NULL, nid, LCFG_ADD_UUID, uuid, 0, 0, 0);
-
 }
 
 static inline int record_add_conn(const struct lu_env *env,
@@ -817,6 +871,422 @@ static inline int record_setup(const struct lu_env *env,
        return record_base(env, llh, devname, 0, LCFG_SETUP, s1, s2, s3, s4);
 }
 
+/**
+ * \retval <0 record processing error
+ * \retval n record is processed. No need copy original one.
+ * \retval 0 record is not processed.
+ */
+static int process_command(const struct lu_env *env, struct lustre_cfg *lcfg,
+                          struct mgs_replace_uuid_lookup *mrul)
+{
+       int nids_added = 0;
+       lnet_nid_t nid;
+       char *ptr;
+       int rc;
+
+       if (lcfg->lcfg_command == LCFG_ADD_UUID) {
+               /* LCFG_ADD_UUID command found. Let's skip original command
+                  and add passed nids */
+               ptr = mrul->target.mti_params;
+               while (class_parse_nid(ptr, &nid, &ptr) == 0) {
+                       CDEBUG(D_MGS, "add nid %s with uuid %s, "
+                              "device %s\n", libcfs_nid2str(nid),
+                               mrul->target.mti_params,
+                               mrul->target.mti_svname);
+                       rc = record_add_uuid(env,
+                                            mrul->temp_llh, nid,
+                                            mrul->target.mti_params);
+                       if (!rc)
+                               nids_added++;
+               }
+
+               if (nids_added == 0) {
+                       CERROR("No new nids were added, nid %s with uuid %s, "
+                              "device %s\n", libcfs_nid2str(nid),
+                              mrul->target.mti_params,
+                              mrul->target.mti_svname);
+                       RETURN(-ENXIO);
+               } else {
+                       mrul->device_nids_added = 1;
+               }
+
+               return nids_added;
+       }
+
+       if (mrul->device_nids_added && lcfg->lcfg_command == LCFG_SETUP) {
+               /* LCFG_SETUP command found. UUID should be changed */
+               rc = record_setup(env,
+                                 mrul->temp_llh,
+                                 /* devname the same */
+                                 lustre_cfg_string(lcfg, 0),
+                                 /* s1 is not changed */
+                                 lustre_cfg_string(lcfg, 1),
+                                 /* new uuid should be
+                                 the full nidlist */
+                                 mrul->target.mti_params,
+                                 /* s3 is not changed */
+                                 lustre_cfg_string(lcfg, 3),
+                                 /* s4 is not changed */
+                                 lustre_cfg_string(lcfg, 4));
+               return rc ? rc : 1;
+       }
+
+       /* Another commands in target device block */
+       return 0;
+}
+
+/**
+ * Handler that called for every record in llog.
+ * Records are processed in order they placed in llog.
+ *
+ * \param[in] llh       log to be processed
+ * \param[in] rec       current record
+ * \param[in] data      mgs_replace_uuid_lookup structure
+ *
+ * \retval 0    success
+ */
+static int mgs_replace_handler(const struct lu_env *env,
+                              struct llog_handle *llh,
+                              struct llog_rec_hdr *rec,
+                              void *data)
+{
+       struct mgs_replace_uuid_lookup *mrul;
+       struct lustre_cfg *lcfg = REC_DATA(rec);
+       int cfg_len = REC_DATA_LEN(rec);
+       int rc;
+       ENTRY;
+
+       mrul = (struct mgs_replace_uuid_lookup *)data;
+
+       if (rec->lrh_type != OBD_CFG_REC) {
+               CERROR("unhandled lrh_type: %#x, cmd %x %s %s\n",
+                      rec->lrh_type, lcfg->lcfg_command,
+                      lustre_cfg_string(lcfg, 0),
+                      lustre_cfg_string(lcfg, 1));
+               RETURN(-EINVAL);
+       }
+
+       rc = lustre_cfg_sanity_check(lcfg, cfg_len);
+       if (rc) {
+               /* Do not copy any invalidated records */
+               GOTO(skip_out, rc = 0);
+       }
+
+       rc = check_markers(lcfg, mrul);
+       if (rc || mrul->skip_it)
+               GOTO(skip_out, rc = 0);
+
+       /* Write to new log all commands outside target device block */
+       if (!mrul->in_target_device)
+               GOTO(copy_out, rc = 0);
+
+       /* Skip all other LCFG_ADD_UUID and LCFG_ADD_CONN records
+          (failover nids) for this target, assuming that if then
+          primary is changing then so is the failover */
+       if (mrul->device_nids_added &&
+           (lcfg->lcfg_command == LCFG_ADD_UUID ||
+            lcfg->lcfg_command == LCFG_ADD_CONN))
+               GOTO(skip_out, rc = 0);
+
+       rc = process_command(env, lcfg, mrul);
+       if (rc < 0)
+               RETURN(rc);
+
+       if (rc)
+               RETURN(0);
+copy_out:
+       /* Record is placed in temporary llog as is */
+       rc = llog_write(env, mrul->temp_llh, rec, NULL, 0, NULL, -1);
+
+       CDEBUG(D_MGS, "Copied idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
+              rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
+              lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
+       RETURN(rc);
+
+skip_out:
+       CDEBUG(D_MGS, "Skipped idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
+              rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
+              lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
+       RETURN(rc);
+}
+
+static int mgs_log_is_empty(const struct lu_env *env,
+                           struct mgs_device *mgs, char *name)
+{
+       struct llog_ctxt        *ctxt;
+       int                      rc;
+
+       ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
+       LASSERT(ctxt != NULL);
+
+       rc = llog_is_empty(env, ctxt, name);
+       llog_ctxt_put(ctxt);
+       return rc;
+}
+
+static int mgs_replace_nids_log(const struct lu_env *env,
+                               struct obd_device *mgs, struct fs_db *fsdb,
+                               char *logname, char *devname, char *nids)
+{
+       struct llog_handle *orig_llh, *backup_llh;
+       struct llog_ctxt *ctxt;
+       struct mgs_replace_uuid_lookup *mrul;
+       struct mgs_device *mgs_dev = lu2mgs_dev(mgs->obd_lu_dev);
+       static struct obd_uuid   cfg_uuid = { .uuid = "config_uuid" };
+       char *backup;
+       int rc, rc2;
+       ENTRY;
+
+       CDEBUG(D_MGS, "Replace nids for %s in %s\n", devname, logname);
+
+       ctxt = llog_get_context(mgs, LLOG_CONFIG_ORIG_CTXT);
+       LASSERT(ctxt != NULL);
+
+       if (mgs_log_is_empty(env, mgs_dev, logname)) {
+               /* Log is empty. Nothing to replace */
+               GOTO(out_put, rc = 0);
+       }
+
+       OBD_ALLOC(backup, strlen(logname) + strlen(".bak") + 1);
+       if (backup == NULL)
+               GOTO(out_put, rc = -ENOMEM);
+
+       sprintf(backup, "%s.bak", logname);
+
+       rc = llog_backup(env, mgs, ctxt, ctxt, logname, backup);
+       if (rc == 0) {
+               /* Now erase original log file. Connections are not allowed.
+                  Backup is already saved */
+               rc = llog_erase(env, ctxt, NULL, logname);
+               if (rc < 0)
+                       GOTO(out_free, rc);
+       } else if (rc != -ENOENT) {
+               CERROR("%s: can't make backup for %s: rc = %d\n",
+                      mgs->obd_name, logname, rc);
+               GOTO(out_free,rc);
+       }
+
+       /* open local log */
+       rc = llog_open_create(env, ctxt, &orig_llh, NULL, logname);
+       if (rc)
+               GOTO(out_restore, rc);
+
+       rc = llog_init_handle(env, orig_llh, LLOG_F_IS_PLAIN, &cfg_uuid);
+       if (rc)
+               GOTO(out_closel, rc);
+
+       /* open backup llog */
+       rc = llog_open(env, ctxt, &backup_llh, NULL, backup,
+                      LLOG_OPEN_EXISTS);
+       if (rc)
+               GOTO(out_closel, rc);
+
+       rc = llog_init_handle(env, backup_llh, LLOG_F_IS_PLAIN, NULL);
+       if (rc)
+               GOTO(out_close, rc);
+
+       if (llog_get_size(backup_llh) <= 1)
+               GOTO(out_close, rc = 0);
+
+       OBD_ALLOC_PTR(mrul);
+       if (!mrul)
+               GOTO(out_close, rc = -ENOMEM);
+       /* devname is only needed information to replace UUID records */
+       strncpy(mrul->target.mti_svname, devname, MTI_NAME_MAXLEN);
+       /* parse nids later */
+       strncpy(mrul->target.mti_params, nids, MTI_PARAM_MAXLEN);
+       /* Copy records to this temporary llog */
+       mrul->temp_llh = orig_llh;
+
+       rc = llog_process(env, backup_llh, mgs_replace_handler,
+                         (void *)mrul, NULL);
+       OBD_FREE_PTR(mrul);
+out_close:
+       rc2 = llog_close(NULL, backup_llh);
+       if (!rc)
+               rc = rc2;
+out_closel:
+       rc2 = llog_close(NULL, orig_llh);
+       if (!rc)
+               rc = rc2;
+
+out_restore:
+       if (rc) {
+               CERROR("%s: llog should be restored: rc = %d\n",
+                      mgs->obd_name, rc);
+               rc2 = llog_backup(env, mgs, ctxt, ctxt, backup,
+                                 logname);
+               if (rc2 < 0)
+                       CERROR("%s: can't restore backup %s: rc = %d\n",
+                              mgs->obd_name, logname, rc2);
+       }
+
+out_free:
+       OBD_FREE(backup, strlen(backup) + 1);
+
+out_put:
+       llog_ctxt_put(ctxt);
+
+       if (rc)
+               CERROR("%s: failed to replace nids in log %s: rc = %d\n",
+                      mgs->obd_name, logname, rc);
+
+       RETURN(rc);
+}
+
+/**
+ * Parse device name and get file system name and/or device index
+ *
+ * \param[in]   devname device name (ex. lustre-MDT0000)
+ * \param[out]  fsname  file system name(optional)
+ * \param[out]  index   device index(optional)
+ *
+ * \retval 0    success
+ */
+static int mgs_parse_devname(char *devname, char *fsname, __u32 *index)
+{
+       int rc;
+       ENTRY;
+
+       /* Extract fsname */
+       if (fsname) {
+               rc = server_name2fsname(devname, fsname, NULL);
+               if (rc < 0) {
+                       CDEBUG(D_MGS, "Device name %s without fsname\n",
+                              devname);
+                       RETURN(-EINVAL);
+               }
+       }
+
+       if (index) {
+               rc = server_name2index(devname, index, NULL);
+               if (rc < 0) {
+                       CDEBUG(D_MGS, "Device name %s with wrong index\n",
+                              devname);
+                       RETURN(-EINVAL);
+               }
+       }
+
+       RETURN(0);
+}
+
+/* This is only called during replace_nids */
+static int only_mgs_is_running(struct obd_device *mgs_obd)
+{
+       /* TDB: Is global variable with devices count exists? */
+       int num_devices = get_devices_count();
+       int num_exports = 0;
+       struct obd_export *exp;
+
+       spin_lock(&mgs_obd->obd_dev_lock);
+       list_for_each_entry(exp, &mgs_obd->obd_exports, exp_obd_chain) {
+               /* skip self export */
+               if (exp == mgs_obd->obd_self_export)
+                       continue;
+               if (exp_connect_flags(exp) & OBD_CONNECT_MDS_MDS)
+                       continue;
+
+               ++num_exports;
+
+               CERROR("%s: node %s still connected during replace_nids "
+                      "connect_flags:%llx\n",
+                      mgs_obd->obd_name,
+                      libcfs_nid2str(exp->exp_nid_stats->nid),
+                      exp_connect_flags(exp));
+
+       }
+       spin_unlock(&mgs_obd->obd_dev_lock);
+
+       /* osd, MGS and MGC + self_export
+          (wc -l /proc/fs/lustre/devices <= 2) && (non self exports == 0) */
+       return (num_devices <= 3) && (num_exports == 0);
+}
+
+static int name_create_mdt(char **logname, char *fsname, int i)
+{
+       char mdt_index[9];
+
+       sprintf(mdt_index, "-MDT%04x", i);
+       return name_create(logname, fsname, mdt_index);
+}
+
+/**
+ * Replace nids for \a device to \a nids values
+ *
+ * \param obd           MGS obd device
+ * \param devname       nids need to be replaced for this device
+ * (ex. lustre-OST0000)
+ * \param nids          nids list (ex. nid1,nid2,nid3)
+ *
+ * \retval 0    success
+ */
+int mgs_replace_nids(const struct lu_env *env,
+                    struct mgs_device *mgs,
+                    char *devname, char *nids)
+{
+       /* Assume fsname is part of device name */
+       char fsname[MTI_NAME_MAXLEN];
+       int rc;
+       __u32 index;
+       char *logname;
+       struct fs_db *fsdb;
+       unsigned int i;
+       int conn_state;
+       struct obd_device *mgs_obd = mgs->mgs_obd;
+       ENTRY;
+
+       /* We can only change NIDs if no other nodes are connected */
+       spin_lock(&mgs_obd->obd_dev_lock);
+       conn_state = mgs_obd->obd_no_conn;
+       mgs_obd->obd_no_conn = 1;
+       spin_unlock(&mgs_obd->obd_dev_lock);
+
+       /* We can not change nids if not only MGS is started */
+       if (!only_mgs_is_running(mgs_obd)) {
+               CERROR("Only MGS is allowed to be started\n");
+               GOTO(out, rc = -EINPROGRESS);
+       }
+
+       /* Get fsname and index*/
+       rc = mgs_parse_devname(devname, fsname, &index);
+       if (rc)
+               GOTO(out, rc);
+
+       rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
+       if (rc) {
+               CERROR("%s: can't find fsdb: rc = %d\n", fsname, rc);
+               GOTO(out, rc);
+       }
+
+       /* Process client llogs */
+       name_create(&logname, fsname, "-client");
+       rc = mgs_replace_nids_log(env, mgs_obd, fsdb, logname, devname, nids);
+       name_destroy(&logname);
+       if (rc) {
+               CERROR("%s: error while replacing NIDs for %s: rc = %d\n",
+                      fsname, devname, rc);
+               GOTO(out, rc);
+       }
+
+       /* Process MDT llogs */
+       for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
+               if (!test_bit(i, fsdb->fsdb_mdt_index_map))
+                       continue;
+               name_create_mdt(&logname, fsname, i);
+               rc = mgs_replace_nids_log(env, mgs_obd, fsdb, logname, devname, nids);
+               name_destroy(&logname);
+               if (rc)
+                       GOTO(out, rc);
+       }
+
+out:
+       spin_lock(&mgs_obd->obd_dev_lock);
+       mgs_obd->obd_no_conn = conn_state;
+       spin_unlock(&mgs_obd->obd_dev_lock);
+
+       RETURN(rc);
+}
+
 static int record_lov_setup(const struct lu_env *env, struct llog_handle *llh,
                            char *devname, struct lov_desc *desc)
 {
@@ -868,7 +1338,7 @@ static inline int record_lov_add(const struct lu_env *env,
                                  char *index, char *gen)
 {
        return record_base(env,llh,lov_name,0,LCFG_LOV_ADD_OBD,
-                           ost_uuid,index,gen,0);
+                          ost_uuid, index, gen, 0);
 }
 
 static inline int record_mount_opt(const struct lu_env *env,
@@ -888,16 +1358,21 @@ static int record_marker(const struct lu_env *env,
        struct mgs_thread_info *mgi = mgs_env_info(env);
        struct lustre_cfg *lcfg;
        int rc;
+       int cplen = 0;
 
        if (flags & CM_START)
                fsdb->fsdb_gen++;
        mgi->mgi_marker.cm_step = fsdb->fsdb_gen;
        mgi->mgi_marker.cm_flags = flags;
        mgi->mgi_marker.cm_vers = LUSTRE_VERSION_CODE;
-       strncpy(mgi->mgi_marker.cm_tgtname, tgtname,
-               sizeof(mgi->mgi_marker.cm_tgtname));
-       strncpy(mgi->mgi_marker.cm_comment, comment,
-               sizeof(mgi->mgi_marker.cm_comment));
+       cplen = strlcpy(mgi->mgi_marker.cm_tgtname, tgtname,
+                       sizeof(mgi->mgi_marker.cm_tgtname));
+       if (cplen >= sizeof(mgi->mgi_marker.cm_tgtname))
+               return -E2BIG;
+       cplen = strlcpy(mgi->mgi_marker.cm_comment, comment,
+                       sizeof(mgi->mgi_marker.cm_comment));
+       if (cplen >= sizeof(mgi->mgi_marker.cm_comment))
+               return -E2BIG;
        mgi->mgi_marker.cm_createtime = cfs_time_current_sec();
        mgi->mgi_marker.cm_canceltime = 0;
        lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL);
@@ -954,35 +1429,6 @@ static int record_end_log(const struct lu_env *env, struct llog_handle **llh)
        return rc;
 }
 
-static int mgs_log_is_empty(const struct lu_env *env,
-                           struct mgs_device *mgs, char *name)
-{
-        struct llog_handle *llh;
-        struct llog_ctxt *ctxt;
-        int rc = 0;
-
-       ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
-        LASSERT(ctxt != NULL);
-       rc = llog_open(env, ctxt, &llh, NULL, name, LLOG_OPEN_EXISTS);
-       if (rc < 0) {
-               if (rc == -ENOENT)
-                       rc = 0;
-               GOTO(out_ctxt, rc);
-       }
-
-       llog_init_handle(env, llh, LLOG_F_IS_PLAIN, NULL);
-       if (rc)
-               GOTO(out_close, rc);
-       rc = llog_get_size(llh);
-
-out_close:
-       llog_close(env, llh);
-out_ctxt:
-       llog_ctxt_put(ctxt);
-       /* header is record 1 */
-       return (rc <= 1);
-}
-
 /******************** config "macros" *********************/
 
 /* write an lcfg directly into a log (with markers) */
@@ -1043,7 +1489,8 @@ int mgs_write_log_direct_all(const struct lu_env *env,
        if (mgs_log_is_empty(env, mgs, logname)) {
                struct llog_handle *llh = NULL;
                rc = record_start_log(env, mgs, &llh, logname);
-               record_end_log(env, &llh);
+               if (rc == 0)
+                       record_end_log(env, &llh);
         }
         name_destroy(&logname);
         if (rc)
@@ -1092,23 +1539,45 @@ next:
         RETURN(rc);
 }
 
-static int mgs_write_log_mdc_to_mdt(const struct lu_env *env,
+static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
                                    struct mgs_device *mgs,
                                    struct fs_db *fsdb,
                                    struct mgs_target_info *mti,
-                                   char *logname);
+                                   int index, char *logname);
 static int mgs_write_log_osc_to_lov(const struct lu_env *env,
                                    struct mgs_device *mgs,
                                    struct fs_db *fsdb,
                                     struct mgs_target_info *mti,
-                                    char *logname, char *suffix, char *lovname,
+                                   char *logname, char *suffix, char *lovname,
                                     enum lustre_sec_part sec_part, int flags);
 static int name_create_mdt_and_lov(char **logname, char **lovname,
                                   struct fs_db *fsdb, int i);
 
-static int mgs_steal_llog_handler(const struct lu_env *env,
-                                 struct llog_handle *llh,
-                                 struct llog_rec_hdr *rec, void *data)
+static int add_param(char *params, char *key, char *val)
+{
+       char *start = params + strlen(params);
+       char *end = params + sizeof(((struct mgs_target_info *)0)->mti_params);
+       int keylen = 0;
+
+       if (key != NULL)
+               keylen = strlen(key);
+       if (start + 1 + keylen + strlen(val) >= end) {
+               CERROR("params are too long: %s %s%s\n",
+                      params, key != NULL ? key : "", val);
+               return -EINVAL;
+       }
+
+       sprintf(start, " %s%s", key != NULL ? key : "", val);
+       return 0;
+}
+
+/**
+ * Walk through client config log record and convert the related records
+ * into the target.
+ **/
+static int mgs_steal_client_llog_handler(const struct lu_env *env,
+                                        struct llog_handle *llh,
+                                        struct llog_rec_hdr *rec, void *data)
 {
        struct mgs_device *mgs;
        struct obd_device *obd;
@@ -1125,6 +1594,7 @@ static int mgs_steal_llog_handler(const struct lu_env *env,
            2: found mdc;
         */
         static int last_step = -1;
+       int cplen = 0;
 
         ENTRY;
 
@@ -1148,68 +1618,84 @@ static int mgs_steal_llog_handler(const struct lu_env *env,
 
         lcfg = (struct lustre_cfg *)cfg_buf;
 
-        if (lcfg->lcfg_command == LCFG_MARKER) {
-                struct cfg_marker *marker;
-                marker = lustre_cfg_buf(lcfg, 1);
-                if (!strncmp(marker->cm_comment,"add osc",7) &&
-                    (marker->cm_flags & CM_START)){
-                        got_an_osc_or_mdc = 1;
-                        strncpy(tmti->mti_svname, marker->cm_tgtname,
-                                sizeof(tmti->mti_svname));
+       if (lcfg->lcfg_command == LCFG_MARKER) {
+               struct cfg_marker *marker;
+               marker = lustre_cfg_buf(lcfg, 1);
+               if (!strncmp(marker->cm_comment, "add osc", 7) &&
+                   (marker->cm_flags & CM_START) &&
+                    !(marker->cm_flags & CM_SKIP)) {
+                       got_an_osc_or_mdc = 1;
+                       cplen = strlcpy(tmti->mti_svname, marker->cm_tgtname,
+                                       sizeof(tmti->mti_svname));
+                       if (cplen >= sizeof(tmti->mti_svname))
+                               RETURN(-E2BIG);
                        rc = record_start_log(env, mgs, &mdt_llh,
                                              mti->mti_svname);
                        if (rc)
                                RETURN(rc);
                        rc = record_marker(env, mdt_llh, fsdb, CM_START,
-                                           mti->mti_svname,"add osc(copied)");
+                                          mti->mti_svname, "add osc(copied)");
                        record_end_log(env, &mdt_llh);
-                        last_step = marker->cm_step;
-                        RETURN(rc);
-                }
-                if (!strncmp(marker->cm_comment,"add osc",7) &&
-                    (marker->cm_flags & CM_END)){
-                        LASSERT(last_step == marker->cm_step);
-                        last_step = -1;
-                        got_an_osc_or_mdc = 0;
+                       last_step = marker->cm_step;
+                       RETURN(rc);
+               }
+               if (!strncmp(marker->cm_comment, "add osc", 7) &&
+                   (marker->cm_flags & CM_END) &&
+                    !(marker->cm_flags & CM_SKIP)) {
+                       LASSERT(last_step == marker->cm_step);
+                       last_step = -1;
+                       got_an_osc_or_mdc = 0;
+                       memset(tmti, 0, sizeof(*tmti));
                        rc = record_start_log(env, mgs, &mdt_llh,
                                              mti->mti_svname);
                        if (rc)
                                RETURN(rc);
                        rc = record_marker(env, mdt_llh, fsdb, CM_END,
-                                           mti->mti_svname,"add osc(copied)");
+                                          mti->mti_svname, "add osc(copied)");
                        record_end_log(env, &mdt_llh);
-                        RETURN(rc);
-                }
-                if (!strncmp(marker->cm_comment,"add mdc",7) &&
-                    (marker->cm_flags & CM_START)){
-                        got_an_osc_or_mdc = 2;
-                        last_step = marker->cm_step;
-                        memcpy(tmti->mti_svname, marker->cm_tgtname,
-                               strlen(marker->cm_tgtname));
-
-                        RETURN(rc);
-                }
-                if (!strncmp(marker->cm_comment,"add mdc",7) &&
-                    (marker->cm_flags & CM_END)){
-                        LASSERT(last_step == marker->cm_step);
-                        last_step = -1;
-                        got_an_osc_or_mdc = 0;
-                        RETURN(rc);
-                }
-        }
+                       RETURN(rc);
+               }
+               if (!strncmp(marker->cm_comment, "add mdc", 7) &&
+                   (marker->cm_flags & CM_START) &&
+                    !(marker->cm_flags & CM_SKIP)) {
+                       got_an_osc_or_mdc = 2;
+                       last_step = marker->cm_step;
+                       memcpy(tmti->mti_svname, marker->cm_tgtname,
+                              strlen(marker->cm_tgtname));
+
+                       RETURN(rc);
+               }
+               if (!strncmp(marker->cm_comment, "add mdc", 7) &&
+                   (marker->cm_flags & CM_END) &&
+                    !(marker->cm_flags & CM_SKIP)) {
+                       LASSERT(last_step == marker->cm_step);
+                       last_step = -1;
+                       got_an_osc_or_mdc = 0;
+                       memset(tmti, 0, sizeof(*tmti));
+                       RETURN(rc);
+               }
+       }
 
         if (got_an_osc_or_mdc == 0 || last_step < 0)
                 RETURN(rc);
 
-        if (lcfg->lcfg_command == LCFG_ADD_UUID) {
-                uint64_t nodenid;
-                nodenid = lcfg->lcfg_nid;
+       if (lcfg->lcfg_command == LCFG_ADD_UUID) {
+               uint64_t nodenid = lcfg->lcfg_nid;
 
-                tmti->mti_nids[tmti->mti_nid_count] = nodenid;
-                tmti->mti_nid_count++;
+               if (strlen(tmti->mti_uuid) == 0) {
+                       /* target uuid not set, this config record is before
+                        * LCFG_SETUP, this nid is one of target node nid.
+                        */
+                       tmti->mti_nids[tmti->mti_nid_count] = nodenid;
+                       tmti->mti_nid_count++;
+               } else {
+                       /* failover node nid */
+                       rc = add_param(tmti->mti_params, PARAM_FAILNODE,
+                                      libcfs_nid2str(nodenid));
+               }
 
-                RETURN(rc);
-        }
+               RETURN(rc);
+       }
 
         if (lcfg->lcfg_command == LCFG_SETUP) {
                 char *target;
@@ -1233,7 +1719,8 @@ static int mgs_steal_llog_handler(const struct lu_env *env,
                        strlen(mti->mti_fsname));
                 tmti->mti_stripe_index = index;
 
-               rc = mgs_write_log_mdc_to_mdt(env, mgs, fsdb, tmti,
+               rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb, tmti,
+                                             mti->mti_stripe_index,
                                              mti->mti_svname);
                 memset(tmti, 0, sizeof(*tmti));
                 RETURN(rc);
@@ -1303,7 +1790,7 @@ static int mgs_steal_llog_for_mdt_from_client(const struct lu_env *env,
        if (rc)
                GOTO(out_close, rc);
 
-       rc = llog_process_or_fork(env, loghandle, mgs_steal_llog_handler,
+       rc = llog_process_or_fork(env, loghandle, mgs_steal_client_llog_handler,
                                  (void *)comp, NULL, false);
        CDEBUG(D_MGS, "steal llog re = %d\n", rc);
 out_close:
@@ -1389,9 +1876,9 @@ static int mgs_write_log_lov(const struct lu_env *env, struct mgs_device *mgs,
         /* Defaults.  Can be changed later by lcfg config_param */
         lovdesc->ld_default_stripe_count = 1;
         lovdesc->ld_pattern = LOV_PATTERN_RAID0;
-        lovdesc->ld_default_stripe_size = 1024 * 1024;
+       lovdesc->ld_default_stripe_size = LOV_DESC_STRIPE_SIZE_DEFAULT;
         lovdesc->ld_default_stripe_offset = -1;
-        lovdesc->ld_qos_maxage = QOS_DEFAULT_MAXAGE;
+       lovdesc->ld_qos_maxage = LOV_DESC_QOS_MAXAGE_DEFAULT;
         sprintf((char*)lovdesc->ld_uuid.uuid, "%s_UUID", lovname);
         /* can these be the same? */
         uuid = (char *)lovdesc->ld_uuid.uuid;
@@ -1458,11 +1945,13 @@ static int mgs_write_log_failnids(const struct lu_env *env,
                                failnodeuuid, cliname);
                        rc = record_add_uuid(env, llh, nid, failnodeuuid);
                 }
-               if (failnodeuuid)
+               if (failnodeuuid) {
                        rc = record_add_conn(env, llh, cliname, failnodeuuid);
+                       name_destroy(&failnodeuuid);
+                       failnodeuuid = NULL;
+               }
         }
 
-       name_destroy(&failnodeuuid);
         return rc;
 }
 
@@ -1546,83 +2035,158 @@ out_free:
         RETURN(rc);
 }
 
+static inline int name_create_lov(char **lovname, char *mdtname,
+                                 struct fs_db *fsdb, int index)
+{
+       /* COMPAT_180 */
+       if (index == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
+               return name_create(lovname, fsdb->fsdb_name, "-mdtlov");
+       else
+               return name_create(lovname, mdtname, "-mdtlov");
+}
+
+static int name_create_mdt_and_lov(char **logname, char **lovname,
+                                  struct fs_db *fsdb, int i)
+{
+       int rc;
+
+       rc = name_create_mdt(logname, fsdb->fsdb_name, i);
+       if (rc)
+               return rc;
+       /* COMPAT_180 */
+       if (i == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
+               rc = name_create(lovname, fsdb->fsdb_name, "-mdtlov");
+       else
+               rc = name_create(lovname, *logname, "-mdtlov");
+       if (rc) {
+               name_destroy(logname);
+               *logname = NULL;
+       }
+       return rc;
+}
+
+static inline int name_create_mdt_osc(char **oscname, char *ostname,
+                                     struct fs_db *fsdb, int i)
+{
+       char suffix[16];
+
+       if (i == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
+               sprintf(suffix, "-osc");
+       else
+               sprintf(suffix, "-osc-MDT%04x", i);
+       return name_create(oscname, ostname, suffix);
+}
+
 /* add new mdc to already existent MDS */
-static int mgs_write_log_mdc_to_mdt(const struct lu_env *env,
+static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
                                    struct mgs_device *mgs,
                                    struct fs_db *fsdb,
                                    struct mgs_target_info *mti,
-                                   char *logname)
+                                   int mdt_index, char *logname)
 {
-        struct llog_handle *llh = NULL;
-       char *nodeuuid = NULL;
-       char *mdcname = NULL;
-       char *mdcuuid = NULL;
-       char *mdtuuid = NULL;
-        int idx = mti->mti_stripe_index;
-        char index[9];
-        int i, rc;
+       struct llog_handle      *llh = NULL;
+       char    *nodeuuid = NULL;
+       char    *ospname = NULL;
+       char    *lovuuid = NULL;
+       char    *mdtuuid = NULL;
+       char    *svname = NULL;
+       char    *mdtname = NULL;
+       char    *lovname = NULL;
+       char    index_str[16];
+       int     i, rc;
 
-        ENTRY;
-       if (mgs_log_is_empty(env, mgs, logname)) {
+       ENTRY;
+       if (mgs_log_is_empty(env, mgs, mti->mti_svname)) {
                 CERROR("log is empty! Logical error\n");
                 RETURN (-EINVAL);
         }
 
-        CDEBUG(D_MGS, "adding mdc index %d to %s\n", idx, logname);
+       CDEBUG(D_MGS, "adding osp index %d to %s\n", mti->mti_stripe_index,
+              logname);
 
-       rc = name_create(&nodeuuid, libcfs_nid2str(mti->mti_nids[0]), "");
+       rc = name_create_mdt(&mdtname, fsdb->fsdb_name, mti->mti_stripe_index);
        if (rc)
                RETURN(rc);
-       snprintf(index, sizeof(index), "-mdc%04x", idx);
-       rc = name_create(&mdcname, logname, index);
+
+       rc = name_create(&nodeuuid, libcfs_nid2str(mti->mti_nids[0]), "");
        if (rc)
-               GOTO(out_free, rc);
-       rc = name_create(&mdcuuid, mdcname, "_UUID");
+               GOTO(out_destory, rc);
+
+       rc = name_create(&svname, mdtname, "-osp");
        if (rc)
-               GOTO(out_free, rc);
-       rc = name_create(&mdtuuid, logname, "_UUID");
+               GOTO(out_destory, rc);
+
+       sprintf(index_str, "-MDT%04x", mdt_index);
+       rc = name_create(&ospname, svname, index_str);
        if (rc)
-               GOTO(out_free, rc);
+               GOTO(out_destory, rc);
+
+       rc = name_create_lov(&lovname, logname, fsdb, mdt_index);
+       if (rc)
+               GOTO(out_destory, rc);
+
+       rc = name_create(&lovuuid, lovname, "_UUID");
+       if (rc)
+               GOTO(out_destory, rc);
+
+       rc = name_create(&mdtuuid, mdtname, "_UUID");
+       if (rc)
+               GOTO(out_destory, rc);
 
        rc = record_start_log(env, mgs, &llh, logname);
        if (rc)
-               GOTO(out_free, rc);
-       rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname, "add mdc");
+               GOTO(out_destory, rc);
+
+       rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
+                          "add osp");
        if (rc)
-               GOTO(out_end, rc);
-        for (i = 0; i < mti->mti_nid_count; i++) {
-                CDEBUG(D_MGS, "add nid %s for mdt\n",
-                       libcfs_nid2str(mti->mti_nids[i]));
+               GOTO(out_destory, rc);
+
+       for (i = 0; i < mti->mti_nid_count; i++) {
+               CDEBUG(D_MGS, "add nid %s for mdt\n",
+                      libcfs_nid2str(mti->mti_nids[i]));
                rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid);
                if (rc)
                        GOTO(out_end, rc);
         }
-       rc = record_attach(env, llh, mdcname, LUSTRE_MDC_NAME, mdcuuid);
+
+       rc = record_attach(env, llh, ospname, LUSTRE_OSP_NAME, lovuuid);
        if (rc)
                GOTO(out_end, rc);
-       rc = record_setup(env, llh, mdcname, mti->mti_uuid, nodeuuid, 0, 0);
+
+       rc = record_setup(env, llh, ospname, mti->mti_uuid, nodeuuid,
+                         NULL, NULL);
        if (rc)
                GOTO(out_end, rc);
-       rc = mgs_write_log_failnids(env, mti, llh, mdcname);
+
+       rc = mgs_write_log_failnids(env, mti, llh, ospname);
        if (rc)
                GOTO(out_end, rc);
-        snprintf(index, sizeof(index), "%d", idx);
 
-       rc = record_mdc_add(env, llh, logname, mdcuuid, mti->mti_uuid,
-                            index, "1");
+       /* Add mdc(osp) to lod */
+       snprintf(index_str, sizeof(mti->mti_stripe_index), "%d",
+                mti->mti_stripe_index);
+       rc = record_base(env, llh, lovname, 0, LCFG_ADD_MDC, mti->mti_uuid,
+                        index_str, "1", NULL);
        if (rc)
                GOTO(out_end, rc);
-       rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname, "add mdc");
+
+       rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname, "add osp");
        if (rc)
                GOTO(out_end, rc);
+
 out_end:
        record_end_log(env, &llh);
-out_free:
+
+out_destory:
        name_destroy(&mdtuuid);
-        name_destroy(&mdcuuid);
-        name_destroy(&mdcname);
+       name_destroy(&lovuuid);
+       name_destroy(&lovname);
+       name_destroy(&ospname);
+       name_destroy(&svname);
         name_destroy(&nodeuuid);
-        RETURN(rc);
+       name_destroy(&mdtname);
+       RETURN(rc);
 }
 
 static int mgs_write_log_mdt0(const struct lu_env *env,
@@ -1689,46 +2253,6 @@ out_free:
         RETURN(rc);
 }
 
-static inline int name_create_mdt(char **logname, char *fsname, int i)
-{
-        char mdt_index[9];
-
-        sprintf(mdt_index, "-MDT%04x", i);
-       return name_create(logname, fsname, mdt_index);
-}
-
-static int name_create_mdt_and_lov(char **logname, char **lovname,
-                                    struct fs_db *fsdb, int i)
-{
-       int rc;
-
-       rc = name_create_mdt(logname, fsdb->fsdb_name, i);
-       if (rc)
-               return rc;
-        /* COMPAT_180 */
-       if (i == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
-               rc = name_create(lovname, fsdb->fsdb_name, "-mdtlov");
-        else
-               rc = name_create(lovname, *logname, "-mdtlov");
-       if (rc) {
-               name_destroy(logname);
-               *logname = NULL;
-       }
-       return rc;
-}
-
-static inline int name_create_mdt_osc(char **oscname, char *ostname,
-                                       struct fs_db *fsdb, int i)
-{
-        char suffix[16];
-
-       if (i == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
-                sprintf(suffix, "-osc");
-        else
-                sprintf(suffix, "-osc-MDT%04x", i);
-       return name_create(oscname, ostname, suffix);
-}
-
 /* envelope method for all layers log */
 static int mgs_write_log_mdt(const struct lu_env *env,
                             struct mgs_device *mgs,
@@ -1810,20 +2334,24 @@ static int mgs_write_log_mdt(const struct lu_env *env,
 
        if (rc)
                GOTO(out_end, rc);
+
        /* for_all_existing_mdt except current one */
-        for (i = 0; i < INDEX_MAP_SIZE * 8; i++){
-                char *mdtname;
-                if (i !=  mti->mti_stripe_index &&
-                   test_bit(i,  fsdb->fsdb_mdt_index_map)) {
-                       rc = name_create_mdt(&mdtname, mti->mti_fsname, i);
+       for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
+               if (i !=  mti->mti_stripe_index &&
+                   test_bit(i, fsdb->fsdb_mdt_index_map)) {
+                       char *logname;
+
+                       rc = name_create_mdt(&logname, fsdb->fsdb_name, i);
                        if (rc)
                                GOTO(out_end, rc);
-                       rc = mgs_write_log_mdc_to_mdt(env, mgs, fsdb, mti, mdtname);
-                        name_destroy(&mdtname);
+
+                       rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb, mti,
+                                                     i, logname);
+                       name_destroy(&logname);
                        if (rc)
                                GOTO(out_end, rc);
-                }
-        }
+               }
+       }
 out_end:
        record_end_log(env, &llh);
 out_free:
@@ -1862,6 +2390,7 @@ static int mgs_write_log_osc_to_lov(const struct lu_env *env,
        rc = name_create(&svname, mti->mti_svname, "-osc");
        if (rc)
                GOTO(out_free, rc);
+
        /* for the system upgraded from old 1.8, keep using the old osc naming
         * style for mdt, see name_create_mdt_osc(). LU-1257 */
        if (test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
@@ -1870,6 +2399,7 @@ static int mgs_write_log_osc_to_lov(const struct lu_env *env,
                rc = name_create(&oscname, svname, suffix);
        if (rc)
                GOTO(out_free, rc);
+
        rc = name_create(&oscuuid, oscname, "_UUID");
        if (rc)
                GOTO(out_free, rc);
@@ -1877,6 +2407,7 @@ static int mgs_write_log_osc_to_lov(const struct lu_env *env,
        if (rc)
                GOTO(out_free, rc);
 
+
         /*
         #03 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0:  1:uml1_UUID
         multihomed (#4)
@@ -1892,11 +2423,18 @@ static int mgs_write_log_osc_to_lov(const struct lu_env *env,
        rc = record_start_log(env, mgs, &llh, logname);
         if (rc)
                GOTO(out_free, rc);
+
         /* FIXME these should be a single journal transaction */
        rc = record_marker(env, llh, fsdb, CM_START | flags, mti->mti_svname,
                            "add osc");
        if (rc)
                GOTO(out_end, rc);
+
+       /* NB: don't change record order, because upon MDT steal OSC config
+        * from client, it treats all nids before LCFG_SETUP as target nids
+        * (multiple interfaces), while nids after as failover node nids.
+        * See mgs_steal_client_llog_handler() LCFG_ADD_UUID.
+        */
         for (i = 0; i < mti->mti_nid_count; i++) {
                 CDEBUG(D_MGS, "add nid %s\n", libcfs_nid2str(mti->mti_nids[i]));
                rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid);
@@ -1912,7 +2450,9 @@ static int mgs_write_log_osc_to_lov(const struct lu_env *env,
        rc = mgs_write_log_failnids(env, mti, llh, oscname);
        if (rc)
                GOTO(out_end, rc);
-        snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
+
+       snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
+
        rc = record_lov_add(env, llh, lovname, mti->mti_uuid, index, "1");
        if (rc)
                GOTO(out_end, rc);
@@ -2200,8 +2740,13 @@ static int mgs_wlp_lcfg(const struct lu_env *env,
                 return rc;
 
         lustre_cfg_bufs_reset(bufs, tgtname);
-        lustre_cfg_bufs_set_string(bufs, 1, ptr);
-        lcfg = lustre_cfg_new(LCFG_PARAM, bufs);
+       lustre_cfg_bufs_set_string(bufs, 1, ptr);
+       if (mti->mti_flags & LDD_F_PARAM2)
+               lustre_cfg_bufs_set_string(bufs, 2, LCTL_UPCALL);
+
+       lcfg = lustre_cfg_new((mti->mti_flags & LDD_F_PARAM2) ?
+                             LCFG_SET_PARAM : LCFG_PARAM, bufs);
+
         if (!lcfg)
                 return -ENOMEM;
        rc = mgs_write_log_direct(env, mgs, fsdb, logname,lcfg,tgtname,comment);
@@ -2209,6 +2754,22 @@ static int mgs_wlp_lcfg(const struct lu_env *env,
         return rc;
 }
 
+static int mgs_write_log_param2(const struct lu_env *env,
+                               struct mgs_device *mgs,
+                               struct fs_db *fsdb,
+                               struct mgs_target_info *mti, char *ptr)
+{
+       struct lustre_cfg_bufs  bufs;
+       int                     rc = 0;
+       ENTRY;
+
+       CDEBUG(D_MGS, "next param '%s'\n", ptr);
+       rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, PARAMS_FILENAME, &bufs,
+                         mti->mti_svname, ptr);
+
+       RETURN(rc);
+}
+
 /* write global variable settings into log */
 static int mgs_write_log_sys(const struct lu_env *env,
                             struct mgs_device *mgs, struct fs_db *fsdb,
@@ -2323,7 +2884,7 @@ static int mgs_write_log_quota(const struct lu_env *env, struct mgs_device *mgs,
                                      mti->mti_fsname, quota, 1);
        *ptr = sep;
        lustre_cfg_free(lcfg);
-       return rc;
+       return rc < 0 ? rc : 0;
 }
 
 static int mgs_srpc_set_param_disk(const struct lu_env *env,
@@ -2564,7 +3125,7 @@ static int mgs_srpc_read_handler(const struct lu_env *env,
 {
        struct mgs_srpc_read_data *msrd = data;
         struct cfg_marker         *marker;
-        struct lustre_cfg         *lcfg = (struct lustre_cfg *)(rec + 1);
+       struct lustre_cfg         *lcfg = REC_DATA(rec);
         char                      *svname, *param;
         int                        cfg_len, rc;
         ENTRY;
@@ -2952,17 +3513,18 @@ static int mgs_write_log_param(const struct lu_env *env,
                 GOTO(end, rc);
         }
 
-        /* All mdd., ost. params in proc */
-        if ((class_match_param(ptr, PARAM_MDD, NULL) == 0) ||
-            (class_match_param(ptr, PARAM_OST, NULL) == 0)) {
-                CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
+       /* All mdd., ost. and osd. params in proc */
+       if ((class_match_param(ptr, PARAM_MDD, NULL) == 0) ||
+           (class_match_param(ptr, PARAM_OST, NULL) == 0) ||
+           (class_match_param(ptr, PARAM_OSD, NULL) == 0)) {
+               CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
                if (mgs_log_is_empty(env, mgs, mti->mti_svname))
-                        GOTO(end, rc = -ENODEV);
+                       GOTO(end, rc = -ENODEV);
 
                rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, mti->mti_svname,
                                  &mgi->mgi_bufs, mti->mti_svname, ptr);
-                GOTO(end, rc);
-        }
+               GOTO(end, rc);
+       }
 
         LCONSOLE_WARN("Ignoring unrecognized param '%s'\n", ptr);
         rc2 = -ENOSYS;
@@ -3031,6 +3593,7 @@ int mgs_write_log_target(const struct lu_env *env,
                   get repeat setup instructions for already running
                   osc's. So don't update the client/mdt logs. */
                mti->mti_flags &= ~LDD_F_UPDATE;
+               rc = 0;
        }
 
        mutex_lock(&fsdb->fsdb_mutex);
@@ -3152,6 +3715,43 @@ int mgs_erase_logs(const struct lu_env *env, struct mgs_device *mgs, char *fsnam
         RETURN(rc);
 }
 
+/* list all logs for the given fs */
+int mgs_list_logs(const struct lu_env *env, struct mgs_device *mgs,
+                 struct obd_ioctl_data *data)
+{
+       cfs_list_t               list;
+       struct mgs_direntry     *dirent, *n;
+       char                    *out, *suffix;
+       int                      l, remains, rc;
+
+       ENTRY;
+
+       /* Find all the logs in the CONFIGS directory */
+       rc = class_dentry_readdir(env, mgs, &list);
+       if (rc) {
+               CERROR("%s: can't read %s dir = %d\n",
+                      mgs->mgs_obd->obd_name, MOUNT_CONFIGS_DIR, rc);
+               RETURN(rc);
+       }
+
+       out = data->ioc_bulk;
+       remains = data->ioc_inllen1;
+       cfs_list_for_each_entry_safe(dirent, n, &list, list) {
+               cfs_list_del(&dirent->list);
+               suffix = strrchr(dirent->name, '-');
+               if (suffix != NULL) {
+                       l = snprintf(out, remains, "config log: $%s\n",
+                                    dirent->name);
+                       out += l;
+                       remains -= l;
+               }
+               mgs_direntry_free(dirent);
+               if (remains <= 0)
+                       break;
+       }
+       RETURN(rc);
+}
+
 /* from llog_swab */
 static void print_lustre_cfg(struct lustre_cfg *lcfg)
 {
@@ -3183,13 +3783,14 @@ static void print_lustre_cfg(struct lustre_cfg *lcfg)
 int mgs_setparam(const struct lu_env *env, struct mgs_device *mgs,
                 struct lustre_cfg *lcfg, char *fsname)
 {
-        struct fs_db *fsdb;
-        struct mgs_target_info *mti;
+       struct fs_db *fsdb;
+       struct mgs_target_info *mti;
         char *devname, *param;
-        char *ptr, *tmp;
-        __u32 index;
-        int rc = 0;
-        ENTRY;
+       char *ptr;
+       const char *tmp;
+       __u32 index;
+       int rc = 0;
+       ENTRY;
 
         print_lustre_cfg(lcfg);
 
@@ -3211,26 +3812,28 @@ int mgs_setparam(const struct lu_env *env, struct mgs_device *mgs,
                 RETURN(-ENOSYS);
         }
 
-        /* Extract fsname */
-        ptr = strrchr(devname, '-');
-        memset(fsname, 0, MTI_NAME_MAXLEN);
-        if (ptr && (server_name2index(ptr, &index, NULL) >= 0)) {
+       rc = mgs_parse_devname(devname, fsname, NULL);
+       if (rc == 0 && !mgs_parse_devname(devname, NULL, &index)) {
                 /* param related to llite isn't allowed to set by OST or MDT */
-                if (strncmp(param, PARAM_LLITE, sizeof(PARAM_LLITE)) == 0)
+               if (rc == 0 && strncmp(param, PARAM_LLITE,
+                                      sizeof(PARAM_LLITE) - 1) == 0)
                         RETURN(-EINVAL);
-
-                strncpy(fsname, devname, ptr - devname);
         } else {
                 /* assume devname is the fsname */
+               memset(fsname, 0, MTI_NAME_MAXLEN);
                 strncpy(fsname, devname, MTI_NAME_MAXLEN);
+               fsname[MTI_NAME_MAXLEN - 1] = 0;
         }
-        fsname[MTI_NAME_MAXLEN - 1] = 0;
         CDEBUG(D_MGS, "setparam fs='%s' device='%s'\n", fsname, devname);
 
-       rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
-        if (rc)
-                RETURN(rc);
-       if (!test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags) &&
+       rc = mgs_find_or_make_fsdb(env, mgs,
+                                  lcfg->lcfg_command == LCFG_SET_PARAM ?
+                                  PARAMS_FILENAME : fsname, &fsdb);
+       if (rc)
+               RETURN(rc);
+
+       if (lcfg->lcfg_command != LCFG_SET_PARAM &&
+           !test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags) &&
            test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) {
                 CERROR("No filesystem targets for %s.  cfg_device from lctl "
                        "is '%s'\n", fsname, devname);
@@ -3242,9 +3845,15 @@ int mgs_setparam(const struct lu_env *env, struct mgs_device *mgs,
         OBD_ALLOC_PTR(mti);
         if (!mti)
                 GOTO(out, rc = -ENOMEM);
-        strncpy(mti->mti_fsname, fsname, MTI_NAME_MAXLEN);
-        strncpy(mti->mti_svname, devname, MTI_NAME_MAXLEN);
-        strncpy(mti->mti_params, param, sizeof(mti->mti_params));
+       if (strlcpy(mti->mti_fsname, fsname, sizeof(mti->mti_fsname))
+           >= sizeof(mti->mti_fsname))
+               GOTO(out, rc = -E2BIG);
+       if (strlcpy(mti->mti_svname, devname, sizeof(mti->mti_svname))
+           >= sizeof(mti->mti_svname))
+               GOTO(out, rc = -E2BIG);
+       if (strlcpy(mti->mti_params, param, sizeof(mti->mti_params))
+           >= sizeof(mti->mti_params))
+               GOTO(out, rc = -E2BIG);
         rc = server_name2index(mti->mti_svname, &mti->mti_stripe_index, &tmp);
         if (rc < 0)
                 /* Not a valid server; may be only fsname */
@@ -3254,20 +3863,26 @@ int mgs_setparam(const struct lu_env *env, struct mgs_device *mgs,
                 if (server_make_name(rc, mti->mti_stripe_index, mti->mti_fsname,
                                      mti->mti_svname))
                         GOTO(out, rc = -EINVAL);
+       /*
+        * Revoke lock so everyone updates.  Should be alright if
+        * someone was already reading while we were updating the logs,
+        * so we don't really need to hold the lock while we're
+        * writing (above).
+        */
+       if (lcfg->lcfg_command == LCFG_SET_PARAM) {
+               mti->mti_flags = rc | LDD_F_PARAM2;
+               mutex_lock(&fsdb->fsdb_mutex);
+               rc = mgs_write_log_param2(env, mgs, fsdb, mti, mti->mti_params);
+               mutex_unlock(&fsdb->fsdb_mutex);
+               mgs_revoke_lock(mgs, fsdb, CONFIG_T_PARAMS);
+       } else {
+               mti->mti_flags = rc | LDD_F_PARAM;
+               mutex_lock(&fsdb->fsdb_mutex);
+               rc = mgs_write_log_param(env, mgs, fsdb, mti, mti->mti_params);
+               mutex_unlock(&fsdb->fsdb_mutex);
+               mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG);
+       }
 
-        mti->mti_flags = rc | LDD_F_PARAM;
-
-       mutex_lock(&fsdb->fsdb_mutex);
-       rc = mgs_write_log_param(env, mgs, fsdb, mti, mti->mti_params);
-       mutex_unlock(&fsdb->fsdb_mutex);
-
-        /*
-         * Revoke lock so everyone updates.  Should be alright if
-         * someone was already reading while we were updating the logs,
-         * so we don't really need to hold the lock while we're
-         * writing (above).
-         */
-       mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG);
 out:
         OBD_FREE_PTR(mti);
         RETURN(rc);
@@ -3275,9 +3890,9 @@ out:
 
 static int mgs_write_log_pool(const struct lu_env *env,
                              struct mgs_device *mgs, char *logname,
-                              struct fs_db *fsdb, char *lovname,
+                             struct fs_db *fsdb, char *tgtname,
                               enum lcfg_command_type cmd,
-                              char *poolname, char *fsname,
+                             char *fsname, char *poolname,
                               char *ostname, char *comment)
 {
         struct llog_handle *llh = NULL;
@@ -3286,18 +3901,96 @@ static int mgs_write_log_pool(const struct lu_env *env,
        rc = record_start_log(env, mgs, &llh, logname);
        if (rc)
                return rc;
-       rc = record_marker(env, llh, fsdb, CM_START, lovname, comment);
+       rc = record_marker(env, llh, fsdb, CM_START, tgtname, comment);
        if (rc)
                goto out;
-       rc = record_base(env, llh, lovname, 0, cmd, poolname, fsname, ostname, 0);
+       rc = record_base(env, llh, tgtname, 0, cmd,
+                        fsname, poolname, ostname, 0);
        if (rc)
                goto out;
-       rc = record_marker(env, llh, fsdb, CM_END, lovname, comment);
+       rc = record_marker(env, llh, fsdb, CM_END, tgtname, comment);
 out:
        record_end_log(env, &llh);
         return rc;
 }
 
+int mgs_nodemap_cmd(const struct lu_env *env, struct mgs_device *mgs,
+                   enum lcfg_command_type cmd, const char *nodemap_name,
+                   const char *param)
+{
+       lnet_nid_t      nid[2];
+       __u32           idmap[2];
+       bool            bool_switch;
+       __u32           int_id;
+       int             rc = 0;
+       ENTRY;
+
+       switch (cmd) {
+       case LCFG_NODEMAP_ADD:
+               rc = nodemap_add(nodemap_name);
+               break;
+       case LCFG_NODEMAP_DEL:
+               rc = nodemap_del(nodemap_name);
+               break;
+       case LCFG_NODEMAP_ADD_RANGE:
+               rc = nodemap_parse_range(param, nid);
+               if (rc != 0)
+                       break;
+               rc = nodemap_add_range(nodemap_name, nid);
+               break;
+       case LCFG_NODEMAP_DEL_RANGE:
+               rc = nodemap_parse_range(param, nid);
+               if (rc != 0)
+                       break;
+               rc = nodemap_del_range(nodemap_name, nid);
+               break;
+       case LCFG_NODEMAP_ADMIN:
+               bool_switch = simple_strtoul(param, NULL, 10);
+               rc = nodemap_set_allow_root(nodemap_name, bool_switch);
+               break;
+       case LCFG_NODEMAP_TRUSTED:
+               bool_switch = simple_strtoul(param, NULL, 10);
+               rc = nodemap_set_trust_client_ids(nodemap_name, bool_switch);
+               break;
+       case LCFG_NODEMAP_SQUASH_UID:
+               int_id = simple_strtoul(param, NULL, 10);
+               rc = nodemap_set_squash_uid(nodemap_name, int_id);
+               break;
+       case LCFG_NODEMAP_SQUASH_GID:
+               int_id = simple_strtoul(param, NULL, 10);
+               rc = nodemap_set_squash_gid(nodemap_name, int_id);
+               break;
+       case LCFG_NODEMAP_ADD_UIDMAP:
+       case LCFG_NODEMAP_ADD_GIDMAP:
+               rc = nodemap_parse_idmap(param, idmap);
+               if (rc != 0)
+                       break;
+               if (cmd == LCFG_NODEMAP_ADD_UIDMAP)
+                       rc = nodemap_add_idmap(nodemap_name, NODEMAP_UID,
+                                              idmap);
+               else
+                       rc = nodemap_add_idmap(nodemap_name, NODEMAP_GID,
+                                              idmap);
+               break;
+       case LCFG_NODEMAP_DEL_UIDMAP:
+       case LCFG_NODEMAP_DEL_GIDMAP:
+               rc = nodemap_parse_idmap(param, idmap);
+               if (rc != 0)
+                       break;
+               if (cmd == LCFG_NODEMAP_DEL_UIDMAP)
+                       rc = nodemap_del_idmap(nodemap_name, NODEMAP_UID,
+                                              idmap);
+               else
+                       rc = nodemap_del_idmap(nodemap_name, NODEMAP_GID,
+                                              idmap);
+               break;
+       default:
+               rc = -EINVAL;
+       }
+
+       RETURN(rc);
+}
+
 int mgs_pool_cmd(const struct lu_env *env, struct mgs_device *mgs,
                 enum lcfg_command_type cmd, char *fsname,
                 char *poolname, char *ostname)
@@ -3370,14 +4063,13 @@ int mgs_pool_cmd(const struct lu_env *env, struct mgs_device *mgs,
                 break;
         }
 
-       mutex_lock(&fsdb->fsdb_mutex);
-
         if (canceled_label != NULL) {
                 OBD_ALLOC_PTR(mti);
                 if (mti == NULL)
                        GOTO(out_cancel, rc = -ENOMEM);
         }
 
+       mutex_lock(&fsdb->fsdb_mutex);
         /* write pool def to all MDT logs */
         for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
                if (test_bit(i,  fsdb->fsdb_mdt_index_map)) {
@@ -3441,64 +4133,3 @@ out_label:
        OBD_FREE(label, label_sz);
         return rc;
 }
-
-#if 0
-/******************** unused *********************/
-static int mgs_backup_llog(struct obd_device *obd, char* fsname)
-{
-        struct file *filp, *bak_filp;
-        struct lvfs_run_ctxt saved;
-        char *logname, *buf;
-        loff_t soff = 0 , doff = 0;
-        int count = 4096, len;
-        int rc = 0;
-
-        OBD_ALLOC(logname, PATH_MAX);
-        if (logname == NULL)
-                return -ENOMEM;
-
-        OBD_ALLOC(buf, count);
-        if (!buf)
-                GOTO(out , rc = -ENOMEM);
-
-        len = snprintf(logname, PATH_MAX, "%s/%s.bak",
-                       MOUNT_CONFIGS_DIR, fsname);
-
-        if (len >= PATH_MAX - 1) {
-                GOTO(out, -ENAMETOOLONG);
-        }
-
-        push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-
-        bak_filp = l_filp_open(logname, O_RDWR|O_CREAT|O_TRUNC, 0660);
-        if (IS_ERR(bak_filp)) {
-                rc = PTR_ERR(bak_filp);
-                CERROR("backup logfile open %s: %d\n", logname, rc);
-                GOTO(pop, rc);
-        }
-        sprintf(logname, "%s/%s", MOUNT_CONFIGS_DIR, fsname);
-        filp = l_filp_open(logname, O_RDONLY, 0);
-        if (IS_ERR(filp)) {
-                rc = PTR_ERR(filp);
-                CERROR("logfile open %s: %d\n", logname, rc);
-                GOTO(close1f, rc);
-        }
-
-        while ((rc = lustre_fread(filp, buf, count, &soff)) > 0) {
-                rc = lustre_fwrite(bak_filp, buf, count, &doff);
-                break;
-        }
-
-        filp_close(filp, 0);
-close1f:
-        filp_close(bak_filp, 0);
-pop:
-        pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-out:
-        if (buf)
-                OBD_FREE(buf, count);
-        OBD_FREE(logname, PATH_MAX);
-        return rc;
-}
-
-#endif