Whamcloud - gitweb
Revert "LU-8922 lod: check master stripes properly"
[fs/lustre-release.git] / lustre / mgs / mgs_llog.c
index aca72e3..5d9c535 100644 (file)
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -27,7 +23,7 @@
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2013, Intel Corporation.
+ * Copyright (c) 2011, 2016, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -72,7 +68,7 @@ int class_dentry_readdir(const struct lu_env *env,
        LASSERT(dir->do_index_ops);
 
        iops = &dir->do_index_ops->dio_it;
-       it = iops->init(env, dir, LUDA_64BITHASH, BYPASS_CAPA);
+       it = iops->init(env, dir, LUDA_64BITHASH);
        if (IS_ERR(it))
                RETURN(PTR_ERR(it));
 
@@ -100,6 +96,15 @@ int class_dentry_readdir(const struct lu_env *env,
                                goto next;
                }
 
+               /* filter out ".bak" files */
+               /* sizeof(".bak") - 1 == 3 */
+               if (key_sz >= 3 &&
+                   !memcmp(".bak", key + key_sz - 3, 3)) {
+                       CDEBUG(D_MGS, "Skipping backup file %.*s\n",
+                              key_sz, key);
+                       goto next;
+               }
+
                de = mgs_direntry_alloc(key_sz + 1);
                if (de == NULL) {
                        rc = -ENOMEM;
@@ -114,7 +119,8 @@ int class_dentry_readdir(const struct lu_env *env,
 next:
                rc = iops->next(env, it);
        } while (rc == 0);
-       rc = 0;
+       if (rc > 0)
+               rc = 0;
 
        iops->put(env, it);
 
@@ -255,10 +261,12 @@ static int mgs_get_fsdb_from_llog(const struct lu_env *env,
                                  struct mgs_device *mgs,
                                  struct fs_db *fsdb)
 {
-       char                            *logname;
-       struct llog_handle              *loghandle;
-       struct llog_ctxt                *ctxt;
-       struct mgs_fsdb_handler_data     d = { fsdb, 0 };
+       char *logname;
+       struct llog_handle *loghandle;
+       struct llog_ctxt *ctxt;
+       struct mgs_fsdb_handler_data d = {
+               .fsdb = fsdb,
+       };
        int rc;
 
        ENTRY;
@@ -334,12 +342,12 @@ static struct fs_db *mgs_new_fsdb(const struct lu_env *env,
 
         if (strlen(fsname) >= sizeof(fsdb->fsdb_name)) {
                 CERROR("fsname %s is too long\n", fsname);
-                RETURN(NULL);
+               RETURN(ERR_PTR(-EINVAL));
         }
 
         OBD_ALLOC_PTR(fsdb);
         if (!fsdb)
-                RETURN(NULL);
+               RETURN(ERR_PTR(-ENOMEM));
 
         strcpy(fsdb->fsdb_name, fsname);
        mutex_init(&fsdb->fsdb_mutex);
@@ -348,6 +356,7 @@ static struct fs_db *mgs_new_fsdb(const struct lu_env *env,
 
         if (strcmp(fsname, MGSSELF_NAME) == 0) {
                set_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags);
+               fsdb->fsdb_mgs = mgs;
         } else {
                 OBD_ALLOC(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
                 OBD_ALLOC(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
@@ -380,7 +389,7 @@ err:
         name_destroy(&fsdb->fsdb_clilov);
         name_destroy(&fsdb->fsdb_clilmv);
         OBD_FREE_PTR(fsdb);
-        RETURN(NULL);
+       RETURN(ERR_PTR(rc));
 }
 
 static void mgs_free_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
@@ -443,11 +452,11 @@ int mgs_find_or_make_fsdb(const struct lu_env *env,
         CDEBUG(D_MGS, "Creating new db\n");
        fsdb = mgs_new_fsdb(env, mgs, name);
        /* lock fsdb_mutex until the db is loaded from llogs */
-       if (fsdb)
+       if (!IS_ERR(fsdb))
                mutex_lock(&fsdb->fsdb_mutex);
        mutex_unlock(&mgs->mgs_mutex);
-        if (!fsdb)
-               RETURN(-ENOMEM);
+       if (IS_ERR(fsdb))
+               RETURN(PTR_ERR(fsdb));
 
        if (!test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
                 /* populate the db from the client llog */
@@ -559,13 +568,14 @@ static int mgs_set_index(const struct lu_env *env,
                         fsdb->fsdb_mdt_count ++;
         }
 
-        if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8) {
-                LCONSOLE_ERROR_MSG(0x13f, "Server %s requested index %d, "
-                                   "but the max index is %d.\n",
-                                   mti->mti_svname, mti->mti_stripe_index,
-                                   INDEX_MAP_SIZE * 8);
+       /* the last index(0xffff) is reserved for default value. */
+       if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8 - 1) {
+               LCONSOLE_ERROR_MSG(0x13f, "Server %s requested index %u, "
+                                  "but index must be less than %u.\n",
+                                  mti->mti_svname, mti->mti_stripe_index,
+                                  INDEX_MAP_SIZE * 8 - 1);
                GOTO(out_up, rc = -ERANGE);
-        }
+       }
 
        if (test_bit(mti->mti_stripe_index, imap)) {
                 if ((mti->mti_flags & LDD_F_VIRGIN) &&
@@ -586,8 +596,12 @@ static int mgs_set_index(const struct lu_env *env,
        set_bit(mti->mti_stripe_index, imap);
        clear_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags);
        mutex_unlock(&fsdb->fsdb_mutex);
-       server_make_name(mti->mti_flags & ~(LDD_F_VIRGIN | LDD_F_WRITECONF),
-                        mti->mti_stripe_index, mti->mti_fsname, mti->mti_svname);
+       if (server_make_name(mti->mti_flags & ~(LDD_F_VIRGIN | LDD_F_WRITECONF),
+                            mti->mti_stripe_index, mti->mti_fsname,
+                            mti->mti_svname)) {
+               CERROR("unknown server type %#x\n", mti->mti_flags);
+               return -EINVAL;
+       }
 
         CDEBUG(D_MGS, "Set index for %s to %d\n", mti->mti_svname,
                mti->mti_stripe_index);
@@ -603,6 +617,116 @@ struct mgs_modify_lookup {
         int               mml_modified;
 };
 
+static int mgs_check_record_match(const struct lu_env *env,
+                               struct llog_handle *llh,
+                               struct llog_rec_hdr *rec, void *data)
+{
+       struct cfg_marker *mc_marker = data;
+       struct cfg_marker *marker;
+       struct lustre_cfg *lcfg = REC_DATA(rec);
+       int cfg_len = REC_DATA_LEN(rec);
+       int rc;
+       ENTRY;
+
+
+       if (rec->lrh_type != OBD_CFG_REC) {
+               CDEBUG(D_ERROR, "Unhandled lrh_type: %#x\n", rec->lrh_type);
+               RETURN(-EINVAL);
+       }
+
+       rc = lustre_cfg_sanity_check(lcfg, cfg_len);
+       if (rc) {
+               CDEBUG(D_ERROR, "Insane cfg\n");
+               RETURN(rc);
+       }
+
+       /* We only care about markers */
+       if (lcfg->lcfg_command != LCFG_MARKER)
+               RETURN(0);
+
+       marker = lustre_cfg_buf(lcfg, 1);
+
+       if (marker->cm_flags & CM_SKIP)
+               RETURN(0);
+
+       if ((strcmp(mc_marker->cm_comment, marker->cm_comment) == 0) &&
+               (strcmp(mc_marker->cm_tgtname, marker->cm_tgtname) == 0)) {
+               /* Found a non-skipped marker match */
+               CDEBUG(D_MGS, "Matched rec %u marker %d flag %x %s %s\n",
+                       rec->lrh_index, marker->cm_step,
+                       marker->cm_flags, marker->cm_tgtname,
+                       marker->cm_comment);
+               rc = LLOG_PROC_BREAK;
+       }
+
+       RETURN(rc);
+}
+
+/**
+ * Check an existing config log record with matching comment and device
+ * Return code:
+ * 0 - checked successfully,
+ * LLOG_PROC_BREAK - record matches
+ * negative - error
+ */
+static int mgs_check_marker(const struct lu_env *env, struct mgs_device *mgs,
+               struct fs_db *fsdb, struct mgs_target_info *mti,
+               char *logname, char *devname, char *comment)
+{
+       struct llog_handle *loghandle;
+       struct llog_ctxt *ctxt;
+       struct cfg_marker *mc_marker;
+       int rc;
+
+       ENTRY;
+
+       LASSERT(mutex_is_locked(&fsdb->fsdb_mutex));
+       CDEBUG(D_MGS, "mgs check %s/%s/%s\n", logname, devname, comment);
+
+       ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
+       LASSERT(ctxt != NULL);
+       rc = llog_open(env, ctxt, &loghandle, NULL, logname, LLOG_OPEN_EXISTS);
+       if (rc < 0) {
+               if (rc == -ENOENT)
+                       rc = 0;
+               GOTO(out_pop, rc);
+       }
+
+       rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
+       if (rc)
+               GOTO(out_close, rc);
+
+       if (llog_get_size(loghandle) <= 1)
+               GOTO(out_close, rc = 0);
+
+       OBD_ALLOC_PTR(mc_marker);
+       if (!mc_marker)
+               GOTO(out_close, rc = -ENOMEM);
+       if (strlcpy(mc_marker->cm_comment, comment,
+               sizeof(mc_marker->cm_comment)) >=
+               sizeof(mc_marker->cm_comment))
+               GOTO(out_free, rc = -E2BIG);
+       if (strlcpy(mc_marker->cm_tgtname, devname,
+               sizeof(mc_marker->cm_tgtname)) >=
+               sizeof(mc_marker->cm_tgtname))
+               GOTO(out_free, rc = -E2BIG);
+
+       rc = llog_process(env, loghandle, mgs_check_record_match,
+                       (void *)mc_marker, NULL);
+
+out_free:
+       OBD_FREE_PTR(mc_marker);
+
+out_close:
+       llog_close(env, loghandle);
+out_pop:
+       if (rc && rc != LLOG_PROC_BREAK)
+               CDEBUG(D_ERROR, "%s: mgs check %s/%s failed: rc = %d\n",
+                       mgs->mgs_obd->obd_name, mti->mti_svname, comment, rc);
+       llog_ctxt_put(ctxt);
+       RETURN(rc);
+}
+
 static int mgs_modify_handler(const struct lu_env *env,
                              struct llog_handle *llh,
                              struct llog_rec_hdr *rec, void *data)
@@ -823,21 +947,24 @@ static inline int record_add_uuid(const struct lu_env *env,
                                  struct llog_handle *llh,
                                  uint64_t nid, char *uuid)
 {
-       return record_base(env, llh, NULL, nid, LCFG_ADD_UUID, uuid, 0, 0, 0);
+       return record_base(env, llh, NULL, nid, LCFG_ADD_UUID, uuid,
+                          NULL, NULL, NULL);
 }
 
 static inline int record_add_conn(const struct lu_env *env,
                                  struct llog_handle *llh,
                                  char *devname, char *uuid)
 {
-       return record_base(env, llh, devname, 0, LCFG_ADD_CONN, uuid, 0, 0, 0);
+       return record_base(env, llh, devname, 0, LCFG_ADD_CONN, uuid,
+                          NULL, NULL, NULL);
 }
 
 static inline int record_attach(const struct lu_env *env,
                                struct llog_handle *llh, char *devname,
                                char *type, char *uuid)
 {
-       return record_base(env, llh,devname, 0, LCFG_ATTACH, type, uuid, 0, 0);
+       return record_base(env, llh, devname, 0, LCFG_ATTACH, type, uuid,
+                          NULL, NULL);
 }
 
 static inline int record_setup(const struct lu_env *env,
@@ -1068,9 +1195,10 @@ static int mgs_replace_nids_log(const struct lu_env *env,
        if (!mrul)
                GOTO(out_close, rc = -ENOMEM);
        /* devname is only needed information to replace UUID records */
-       strncpy(mrul->target.mti_svname, devname, MTI_NAME_MAXLEN);
+       strlcpy(mrul->target.mti_svname, devname,
+               sizeof(mrul->target.mti_svname));
        /* parse nids later */
-       strncpy(mrul->target.mti_params, nids, MTI_PARAM_MAXLEN);
+       strlcpy(mrul->target.mti_params, nids, sizeof(mrul->target.mti_params));
        /* Copy records to this temporary llog */
        mrul->temp_llh = orig_llh;
 
@@ -1314,8 +1442,8 @@ static inline int record_lov_add(const struct lu_env *env,
                                  char *lov_name, char *ost_uuid,
                                  char *index, char *gen)
 {
-       return record_base(env,llh,lov_name,0,LCFG_LOV_ADD_OBD,
-                          ost_uuid, index, gen, 0);
+       return record_base(env, llh, lov_name, 0, LCFG_LOV_ADD_OBD,
+                          ost_uuid, index, gen, NULL);
 }
 
 static inline int record_mount_opt(const struct lu_env *env,
@@ -1323,8 +1451,8 @@ static inline int record_mount_opt(const struct lu_env *env,
                                    char *profile, char *lov_name,
                                    char *mdc_name)
 {
-       return record_base(env,llh,NULL,0,LCFG_MOUNTOPT,
-                           profile,lov_name,mdc_name,0);
+       return record_base(env, llh, NULL, 0, LCFG_MOUNTOPT,
+                          profile, lov_name, mdc_name, NULL);
 }
 
 static int record_marker(const struct lu_env *env,
@@ -1440,10 +1568,12 @@ out_end:
 }
 
 /* write the lcfg in all logs for the given fs */
-int mgs_write_log_direct_all(const struct lu_env *env, struct mgs_device *mgs,
-                            struct fs_db *fsdb, struct mgs_target_info *mti,
-                            struct llog_cfg_rec *lcr, char *devname,
-                            char *comment, int server_only)
+static int mgs_write_log_direct_all(const struct lu_env *env,
+                                   struct mgs_device *mgs,
+                                   struct fs_db *fsdb,
+                                   struct mgs_target_info *mti,
+                                   struct llog_cfg_rec *lcr, char *devname,
+                                   char *comment, int server_only)
 {
        struct list_head         log_list;
        struct mgs_direntry     *dirent, *n;
@@ -1467,7 +1597,9 @@ int mgs_write_log_direct_all(const struct lu_env *env, struct mgs_device *mgs,
                if (server_only && strstr(dirent->mde_name, "-client") != NULL)
                        goto next;
 
-               if (strncmp(fsname, dirent->mde_name, len) != 0)
+               if (strlen(dirent->mde_name) <= len ||
+                   strncmp(fsname, dirent->mde_name, len) != 0 ||
+                   dirent->mde_name[len] != '-')
                        goto next;
 
                CDEBUG(D_MGS, "Changing log %s\n", dirent->mde_name);
@@ -1633,7 +1765,7 @@ static int mgs_steal_client_llog_handler(const struct lu_env *env,
                 RETURN(rc);
 
        if (lcfg->lcfg_command == LCFG_ADD_UUID) {
-               uint64_t nodenid = lcfg->lcfg_nid;
+               __u64 nodenid = lcfg->lcfg_nid;
 
                if (strlen(tmti->mti_uuid) == 0) {
                        /* target uuid not set, this config record is before
@@ -1642,9 +1774,12 @@ static int mgs_steal_client_llog_handler(const struct lu_env *env,
                        tmti->mti_nids[tmti->mti_nid_count] = nodenid;
                        tmti->mti_nid_count++;
                } else {
+                       char nidstr[LNET_NIDSTR_SIZE];
+
                        /* failover node nid */
+                       libcfs_nid2str_r(nodenid, nidstr, sizeof(nidstr));
                        rc = add_param(tmti->mti_params, PARAM_FAILNODE,
-                                      libcfs_nid2str(nodenid));
+                                       nidstr);
                }
 
                RETURN(rc);
@@ -1882,30 +2017,48 @@ static int mgs_write_log_failnids(const struct lu_env *env,
         #07 L add_conn 0:OSC_uml1_ost1_mdsA  1:uml2_UUID
         */
 
-        /* Pull failnid info out of params string */
+       /*
+        * Pull failnid info out of params string, which may contain something
+        * like "<nid1>,<nid2>:<nid3>,<nid4>".  class_parse_nid() does not
+        * complain about abnormal inputs like ",:<nid1>", "<nid1>:,<nid2>",
+        * etc.  However, convert_hostnames() should have caught those.
+        */
         while (class_find_param(ptr, PARAM_FAILNODE, &ptr) == 0) {
                 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
-                        if (failnodeuuid == NULL) {
-                                /* We don't know the failover node name,
-                                   so just use the first nid as the uuid */
-                                rc = name_create(&failnodeuuid,
-                                                 libcfs_nid2str(nid), "");
-                                if (rc)
-                                        return rc;
-                        }
-                        CDEBUG(D_MGS, "add nid %s for failover uuid %s, "
-                               "client %s\n", libcfs_nid2str(nid),
-                               failnodeuuid, cliname);
+                       char nidstr[LNET_NIDSTR_SIZE];
+
+                       if (failnodeuuid == NULL) {
+                               /* We don't know the failover node name,
+                                * so just use the first nid as the uuid */
+                               libcfs_nid2str_r(nid, nidstr, sizeof(nidstr));
+                               rc = name_create(&failnodeuuid, nidstr, "");
+                               if (rc != 0)
+                                       return rc;
+                       }
+                       CDEBUG(D_MGS, "add nid %s for failover uuid %s, "
+                               "client %s\n",
+                               libcfs_nid2str_r(nid, nidstr, sizeof(nidstr)),
+                               failnodeuuid, cliname);
                        rc = record_add_uuid(env, llh, nid, failnodeuuid);
-                }
+                       /*
+                        * If *ptr is ':', we have added all NIDs for
+                        * failnodeuuid.
+                        */
+                       if (*ptr == ':') {
+                               rc = record_add_conn(env, llh, cliname,
+                                                    failnodeuuid);
+                               name_destroy(&failnodeuuid);
+                               failnodeuuid = NULL;
+                       }
+               }
                if (failnodeuuid) {
                        rc = record_add_conn(env, llh, cliname, failnodeuuid);
                        name_destroy(&failnodeuuid);
                        failnodeuuid = NULL;
                }
-        }
+       }
 
-        return rc;
+       return rc;
 }
 
 static int mgs_write_log_mdc_to_lmv(const struct lu_env *env,
@@ -1919,9 +2072,10 @@ static int mgs_write_log_mdc_to_lmv(const struct lu_env *env,
        char *nodeuuid = NULL;
        char *mdcuuid = NULL;
        char *lmvuuid = NULL;
-        char index[6];
-        int i, rc;
-        ENTRY;
+       char index[6];
+       char nidstr[LNET_NIDSTR_SIZE];
+       int i, rc;
+       ENTRY;
 
        if (mgs_log_is_empty(env, mgs, logname)) {
                 CERROR("log is empty! Logical error\n");
@@ -1931,7 +2085,8 @@ static int mgs_write_log_mdc_to_lmv(const struct lu_env *env,
         CDEBUG(D_MGS, "adding mdc for %s to log %s:lmv(%s)\n",
                mti->mti_svname, logname, lmvname);
 
-       rc = name_create(&nodeuuid, libcfs_nid2str(mti->mti_nids[0]), "");
+       libcfs_nid2str_r(mti->mti_nids[0], nidstr, sizeof(nidstr));
+       rc = name_create(&nodeuuid, nidstr, "");
        if (rc)
                RETURN(rc);
        rc = name_create(&mdcname, mti->mti_svname, "-mdc");
@@ -1951,19 +2106,21 @@ static int mgs_write_log_mdc_to_lmv(const struct lu_env *env,
                            "add mdc");
        if (rc)
                GOTO(out_end, rc);
-        for (i = 0; i < mti->mti_nid_count; i++) {
-                CDEBUG(D_MGS, "add nid %s for mdt\n",
-                       libcfs_nid2str(mti->mti_nids[i]));
+       for (i = 0; i < mti->mti_nid_count; i++) {
+               CDEBUG(D_MGS, "add nid %s for mdt\n",
+                       libcfs_nid2str_r(mti->mti_nids[i],
+                                        nidstr, sizeof(nidstr)));
 
                rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid);
                if (rc)
                        GOTO(out_end, rc);
-        }
+       }
 
        rc = record_attach(env, llh, mdcname, LUSTRE_MDC_NAME, lmvuuid);
        if (rc)
                GOTO(out_end, rc);
-       rc = record_setup(env, llh, mdcname, mti->mti_uuid, nodeuuid, 0, 0);
+       rc = record_setup(env, llh, mdcname, mti->mti_uuid, nodeuuid,
+                         NULL, NULL);
        if (rc)
                GOTO(out_end, rc);
        rc = mgs_write_log_failnids(env, mti, llh, mdcname);
@@ -2046,6 +2203,7 @@ static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
        char    *mdtname = NULL;
        char    *lovname = NULL;
        char    index_str[16];
+       char    nidstr[LNET_NIDSTR_SIZE];
        int     i, rc;
 
        ENTRY;
@@ -2061,7 +2219,8 @@ static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
        if (rc)
                RETURN(rc);
 
-       rc = name_create(&nodeuuid, libcfs_nid2str(mti->mti_nids[0]), "");
+       libcfs_nid2str_r(mti->mti_nids[0], nidstr, sizeof(nidstr));
+       rc = name_create(&nodeuuid, nidstr, "");
        if (rc)
                GOTO(out_destory, rc);
 
@@ -2097,11 +2256,12 @@ static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
 
        for (i = 0; i < mti->mti_nid_count; i++) {
                CDEBUG(D_MGS, "add nid %s for mdt\n",
-                      libcfs_nid2str(mti->mti_nids[i]));
+                       libcfs_nid2str_r(mti->mti_nids[i],
+                                        nidstr, sizeof(nidstr)));
                rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid);
                if (rc)
                        GOTO(out_end, rc);
-        }
+       }
 
        rc = record_attach(env, llh, ospname, LUSTRE_OSP_NAME, lovuuid);
        if (rc)
@@ -2117,8 +2277,7 @@ static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
                GOTO(out_end, rc);
 
        /* Add mdc(osp) to lod */
-       snprintf(index_str, sizeof(mti->mti_stripe_index), "%d",
-                mti->mti_stripe_index);
+       snprintf(index_str, sizeof(index_str), "%d", mti->mti_stripe_index);
        rc = record_base(env, llh, lovname, 0, LCFG_ADD_MDC, mti->mti_uuid,
                         index_str, "1", NULL);
        if (rc)
@@ -2256,34 +2415,34 @@ static int mgs_write_log_mdt(const struct lu_env *env,
         #14 L mount_option 0:  1:client  2:lov1  3:MDC_uml1_mdsA_MNT_client
         */
 
-                /* copy client info about lov/lmv */
-               mgi->mgi_comp.comp_mti = mti;
-               mgi->mgi_comp.comp_fsdb = fsdb;
+       /* copy client info about lov/lmv */
+       mgi->mgi_comp.comp_mti = mti;
+       mgi->mgi_comp.comp_fsdb = fsdb;
 
-               rc = mgs_steal_llog_for_mdt_from_client(env, mgs, cliname,
-                                                       &mgi->mgi_comp);
-               if (rc)
-                       GOTO(out_free, rc);
-               rc = mgs_write_log_mdc_to_lmv(env, mgs, fsdb, mti, cliname,
-                                              fsdb->fsdb_clilmv);
-               if (rc)
-                       GOTO(out_free, rc);
+       rc = mgs_steal_llog_for_mdt_from_client(env, mgs, cliname,
+                                               &mgi->mgi_comp);
+       if (rc)
+               GOTO(out_free, rc);
+       rc = mgs_write_log_mdc_to_lmv(env, mgs, fsdb, mti, cliname,
+                                     fsdb->fsdb_clilmv);
+       if (rc)
+               GOTO(out_free, rc);
 
-                /* add mountopts */
-               rc = record_start_log(env, mgs, &llh, cliname);
-               if (rc)
-                       GOTO(out_free, rc);
+       /* add mountopts */
+       rc = record_start_log(env, mgs, &llh, cliname);
+       if (rc)
+               GOTO(out_free, rc);
 
-               rc = record_marker(env, llh, fsdb, CM_START, cliname,
-                                   "mount opts");
-               if (rc)
-                       GOTO(out_end, rc);
-               rc = record_mount_opt(env, llh, cliname, fsdb->fsdb_clilov,
-                                      fsdb->fsdb_clilmv);
-               if (rc)
-                       GOTO(out_end, rc);
-               rc = record_marker(env, llh, fsdb, CM_END, cliname,
-                                   "mount opts");
+       rc = record_marker(env, llh, fsdb, CM_START, cliname,
+                          "mount opts");
+       if (rc)
+               GOTO(out_end, rc);
+       rc = record_mount_opt(env, llh, cliname, fsdb->fsdb_clilov,
+                             fsdb->fsdb_clilmv);
+       if (rc)
+               GOTO(out_end, rc);
+       rc = record_marker(env, llh, fsdb, CM_END, cliname,
+                          "mount opts");
 
        if (rc)
                GOTO(out_end, rc);
@@ -2298,8 +2457,34 @@ static int mgs_write_log_mdt(const struct lu_env *env,
                        if (rc)
                                GOTO(out_end, rc);
 
-                       rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb, mti,
-                                                     i, logname);
+                       /* NB: If the log for the MDT is empty, it means
+                        * the MDT is only added to the index
+                        * map, and not being process yet, i.e. this
+                        * is an unregistered MDT, see mgs_write_log_target().
+                        * so we should skip it. Otherwise
+                        *
+                        * 1. MGS get register request for MDT1 and MDT2.
+                        *
+                        * 2. Then both MDT1 and MDT2 are added into
+                        * fsdb_mdt_index_map. (see mgs_set_index()).
+                        *
+                        * 3. Then MDT1 get the lock of fsdb_mutex, then
+                        * generate the config log, here, it will regard MDT2
+                        * as an existent MDT, and generate "add osp" for
+                        * lustre-MDT0001-osp-MDT0002. Note: at the moment
+                        * MDT0002 config log is still empty, so it will
+                        * add "add osp" even before "lov setup", which
+                        * will definitly cause trouble.
+                        *
+                        * 4. MDT1 registeration finished, fsdb_mutex is
+                        * released, then MDT2 get in, then in above
+                        * mgs_steal_llog_for_mdt_from_client(), it will
+                        * add another osp log for lustre-MDT0001-osp-MDT0002,
+                        * which will cause another trouble.*/
+                       if (!mgs_log_is_empty(env, mgs, logname))
+                               rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb,
+                                                             mti, i, logname);
+
                        name_destroy(&logname);
                        if (rc)
                                GOTO(out_end, rc);
@@ -2319,25 +2504,27 @@ static int mgs_write_log_osc_to_lov(const struct lu_env *env,
                                     char *logname, char *suffix, char *lovname,
                                     enum lustre_sec_part sec_part, int flags)
 {
-        struct llog_handle *llh = NULL;
+       struct llog_handle *llh = NULL;
        char *nodeuuid = NULL;
        char *oscname = NULL;
        char *oscuuid = NULL;
        char *lovuuid = NULL;
        char *svname = NULL;
-        char index[6];
-        int i, rc;
+       char index[6];
+       char nidstr[LNET_NIDSTR_SIZE];
+       int i, rc;
+       ENTRY;
 
-        ENTRY;
-        CDEBUG(D_INFO, "adding osc for %s to log %s\n",
-               mti->mti_svname, logname);
+       CDEBUG(D_INFO, "adding osc for %s to log %s\n",
+               mti->mti_svname, logname);
 
        if (mgs_log_is_empty(env, mgs, logname)) {
-                CERROR("log is empty! Logical error\n");
-                RETURN (-EINVAL);
-        }
+               CERROR("log is empty! Logical error\n");
+               RETURN(-EINVAL);
+       }
 
-       rc = name_create(&nodeuuid, libcfs_nid2str(mti->mti_nids[0]), "");
+       libcfs_nid2str_r(mti->mti_nids[0], nidstr, sizeof(nidstr));
+       rc = name_create(&nodeuuid, nidstr, "");
        if (rc)
                RETURN(rc);
        rc = name_create(&svname, mti->mti_svname, "-osc");
@@ -2388,16 +2575,19 @@ static int mgs_write_log_osc_to_lov(const struct lu_env *env,
         * (multiple interfaces), while nids after as failover node nids.
         * See mgs_steal_client_llog_handler() LCFG_ADD_UUID.
         */
-        for (i = 0; i < mti->mti_nid_count; i++) {
-                CDEBUG(D_MGS, "add nid %s\n", libcfs_nid2str(mti->mti_nids[i]));
+       for (i = 0; i < mti->mti_nid_count; i++) {
+               CDEBUG(D_MGS, "add nid %s\n",
+                       libcfs_nid2str_r(mti->mti_nids[i],
+                                        nidstr, sizeof(nidstr)));
                rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid);
                if (rc)
                        GOTO(out_end, rc);
-        }
+       }
        rc = record_attach(env, llh, oscname, LUSTRE_OSC_NAME, lovuuid);
        if (rc)
                GOTO(out_end, rc);
-       rc = record_setup(env, llh, oscname, mti->mti_uuid, nodeuuid, 0, 0);
+       rc = record_setup(env, llh, oscname, mti->mti_uuid, nodeuuid,
+                         NULL, NULL);
        if (rc)
                GOTO(out_end, rc);
        rc = mgs_write_log_failnids(env, mti, llh, oscname);
@@ -2471,7 +2661,7 @@ static int mgs_write_log_ost(const struct lu_env *env,
                GOTO(out_end, rc);
        rc = record_setup(env, llh, mti->mti_svname,
                           "dev"/*ignored*/, "type"/*ignored*/,
-                          failout ? "n" : "f", 0/*options*/);
+                         failout ? "n" : "f", NULL/*options*/);
        if (rc)
                GOTO(out_end, rc);
        rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname, "add ost");
@@ -2537,7 +2727,7 @@ out_end:
                        GOTO(out_free, rc);
         }
        rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, mti, logname, "",
-                                     fsdb->fsdb_clilov, LUSTRE_SP_CLI, 0);
+                                     fsdb->fsdb_clilov, LUSTRE_SP_CLI, flags);
 out_free:
         name_destroy(&logname);
         RETURN(rc);
@@ -3030,6 +3220,9 @@ static int mgs_srpc_set_param_mem(struct fs_db *fsdb,
                 }
 
                 rset = &tgtconf->mtsc_rset;
+       } else if (strcmp(svname, MGSSELF_NAME) == 0) {
+               /* put _mgs related srpc rule directly in mgs ruleset */
+               rset = &fsdb->fsdb_mgs->mgs_lut.lut_sptlrpc_rset;
         } else {
                 rset = &fsdb->fsdb_srpc_gen;
         }
@@ -3217,7 +3410,7 @@ static int mgs_write_log_param(const struct lu_env *env,
        struct mgs_thread_info *mgi = mgs_env_info(env);
         char *logname;
         char *tmp;
-        int rc = 0, rc2 = 0;
+       int rc = 0;
         ENTRY;
 
         /* For various parameter settings, we have to figure out which logs
@@ -3273,61 +3466,77 @@ static int mgs_write_log_param(const struct lu_env *env,
                GOTO(end, rc);
        }
 
-        if (class_match_param(ptr, PARAM_OSC""PARAM_ACTIVE, &tmp) == 0) {
-                /* active=0 means off, anything else means on */
-                int flag = (*tmp == '0') ? CM_EXCLUDE : 0;
-                int i;
+       if (class_match_param(ptr, PARAM_OSC PARAM_ACTIVE, &tmp) == 0 ||
+           class_match_param(ptr, PARAM_MDC PARAM_ACTIVE, &tmp) == 0) {
+               /* active=0 means off, anything else means on */
+               int flag = (*tmp == '0') ? CM_EXCLUDE : 0;
+               bool deactive_osc = memcmp(ptr, PARAM_OSC PARAM_ACTIVE,
+                                         strlen(PARAM_OSC PARAM_ACTIVE)) == 0;
+               int i;
 
-                if (!(mti->mti_flags & LDD_F_SV_TYPE_OST)) {
-                        LCONSOLE_ERROR_MSG(0x144, "%s: Only OSCs can "
-                                           "be (de)activated.\n",
-                                           mti->mti_svname);
-                        GOTO(end, rc = -EINVAL);
-                }
-                LCONSOLE_WARN("Permanently %sactivating %s\n",
-                              flag ? "de": "re", mti->mti_svname);
-                /* Modify clilov */
+               if (!deactive_osc) {
+                       __u32   index;
+
+                       rc = server_name2index(mti->mti_svname, &index, NULL);
+                       if (rc < 0)
+                               GOTO(end, rc);
+
+                       if (index == 0) {
+                               LCONSOLE_ERROR_MSG(0x144, "%s: MDC0 can not be"
+                                                  " (de)activated.\n",
+                                                  mti->mti_svname);
+                               GOTO(end, rc = -EINVAL);
+                       }
+               }
+
+               LCONSOLE_WARN("Permanently %sactivating %s\n",
+                             flag ? "de" : "re", mti->mti_svname);
+               /* Modify clilov */
                rc = name_create(&logname, mti->mti_fsname, "-client");
-               if (rc)
+               if (rc < 0)
                        GOTO(end, rc);
                rc = mgs_modify(env, mgs, fsdb, mti, logname,
-                                mti->mti_svname, "add osc", flag);
-                name_destroy(&logname);
-                if (rc)
-                        goto active_err;
-                /* Modify mdtlov */
-                /* Add to all MDT logs for CMD */
-                for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
+                               mti->mti_svname,
+                               deactive_osc ? "add osc" : "add mdc", flag);
+               name_destroy(&logname);
+               if (rc < 0)
+                       goto active_err;
+
+               /* Modify mdtlov */
+               /* Add to all MDT logs for DNE */
+               for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
                        if (!test_bit(i, fsdb->fsdb_mdt_index_map))
-                                continue;
+                               continue;
                        rc = name_create_mdt(&logname, mti->mti_fsname, i);
-                       if (rc)
+                       if (rc < 0)
                                GOTO(end, rc);
                        rc = mgs_modify(env, mgs, fsdb, mti, logname,
-                                       mti->mti_svname, "add osc", flag);
-                        name_destroy(&logname);
-                        if (rc)
-                                goto active_err;
-                }
-        active_err:
-                if (rc) {
-                        LCONSOLE_ERROR_MSG(0x145, "Couldn't find %s in"
-                                           "log (%d). No permanent "
-                                           "changes were made to the "
-                                           "config log.\n",
-                                           mti->mti_svname, rc);
+                                       mti->mti_svname,
+                                       deactive_osc ? "add osc" : "add osp",
+                                       flag);
+                       name_destroy(&logname);
+                       if (rc < 0)
+                               goto active_err;
+               }
+active_err:
+               if (rc < 0) {
+                       LCONSOLE_ERROR_MSG(0x145, "Couldn't find %s in"
+                                          "log (%d). No permanent "
+                                          "changes were made to the "
+                                          "config log.\n",
+                                          mti->mti_svname, rc);
                        if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags))
-                                LCONSOLE_ERROR_MSG(0x146, "This may be"
-                                                   " because the log"
-                                                   "is in the old 1.4"
-                                                   "style. Consider "
-                                                   " --writeconf to "
-                                                   "update the logs.\n");
-                        GOTO(end, rc);
-                }
-                /* Fall through to osc proc for deactivating live OSC
-                   on running MDT / clients. */
-        }
+                               LCONSOLE_ERROR_MSG(0x146, "This may be"
+                                                  " because the log"
+                                                  "is in the old 1.4"
+                                                  "style. Consider "
+                                                  " --writeconf to "
+                                                  "update the logs.\n");
+                       GOTO(end, rc);
+               }
+               /* Fall through to osc/mdc proc for deactivating live
+                  OSC/OSP on running MDT / clients. */
+       }
         /* Below here, let obd's XXX_process_config methods handle it */
 
         /* All lov. in proc */
@@ -3447,6 +3656,78 @@ static int mgs_write_log_param(const struct lu_env *env,
                                }
                        }
                }
+
+               /* For mdc activate/deactivate, it affects OSP on MDT as well */
+               if (class_match_param(ptr, PARAM_MDC PARAM_ACTIVE, &tmp) == 0 &&
+                   rc == 0) {
+                       char suffix[16];
+                       char *lodname = NULL;
+                       char *param_str = NULL;
+                       int i;
+                       int index;
+
+                       /* replace mdc with osp */
+                       memcpy(ptr, PARAM_OSP, strlen(PARAM_OSP));
+                       rc = server_name2index(mti->mti_svname, &index, NULL);
+                       if (rc < 0) {
+                               memcpy(ptr, PARAM_MDC, strlen(PARAM_MDC));
+                               GOTO(end, rc);
+                       }
+
+                       for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
+                               if (!test_bit(i, fsdb->fsdb_mdt_index_map))
+                                       continue;
+
+                               if (i == index)
+                                       continue;
+
+                               name_destroy(&logname);
+                               rc = name_create_mdt(&logname, mti->mti_fsname,
+                                                    i);
+                               if (rc < 0)
+                                       break;
+
+                               if (mgs_log_is_empty(env, mgs, logname))
+                                       continue;
+
+                               snprintf(suffix, sizeof(suffix), "-osp-MDT%04x",
+                                        i);
+                               name_destroy(&cname);
+                               rc = name_create(&cname, mti->mti_svname,
+                                                suffix);
+                               if (rc < 0)
+                                       break;
+
+                               rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname,
+                                                 &mgi->mgi_bufs, cname, ptr);
+                               if (rc < 0)
+                                       break;
+
+                               /* Add configuration log for noitfying LOD
+                                * to active/deactive the OSP. */
+                               name_destroy(&param_str);
+                               rc = name_create(&param_str, cname,
+                                                (*tmp == '0') ?  ".active=0" :
+                                                ".active=1");
+                               if (rc < 0)
+                                       break;
+
+                               name_destroy(&lodname);
+                               rc = name_create(&lodname, logname, "-mdtlov");
+                               if (rc < 0)
+                                       break;
+
+                               rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname,
+                                                 &mgi->mgi_bufs, lodname,
+                                                 param_str);
+                               if (rc < 0)
+                                       break;
+                       }
+                       memcpy(ptr, PARAM_MDC, strlen(PARAM_MDC));
+                       name_destroy(&lodname);
+                       name_destroy(&param_str);
+               }
+
                name_destroy(&logname);
                name_destroy(&cname);
                GOTO(end, rc);
@@ -3544,47 +3825,12 @@ static int mgs_write_log_param(const struct lu_env *env,
        }
 
         LCONSOLE_WARN("Ignoring unrecognized param '%s'\n", ptr);
-        rc2 = -ENOSYS;
 
 end:
         if (rc)
                 CERROR("err %d on param '%s'\n", rc, ptr);
 
-        RETURN(rc ?: rc2);
-}
-
-/* Not implementing automatic failover nid addition at this time. */
-int mgs_check_failnid(const struct lu_env *env, struct mgs_device *mgs,
-                     struct mgs_target_info *mti)
-{
-#if 0
-        struct fs_db *fsdb;
-        int rc;
-        ENTRY;
-
-        rc = mgs_find_or_make_fsdb(obd, fsname, &fsdb);
-        if (rc)
-                RETURN(rc);
-
-        if (mgs_log_is_empty(obd, mti->mti_svname))
-                /* should never happen */
-                RETURN(-ENOENT);
-
-        CDEBUG(D_MGS, "Checking for new failnids for %s\n", mti->mti_svname);
-
-        /* FIXME We can just check mti->params to see if we're already in
-           the failover list.  Modify mti->params for rewriting back at
-           server_register_target(). */
-
-       mutex_lock(&fsdb->fsdb_mutex);
-        rc = mgs_write_log_add_failnid(obd, fsdb, mti);
-       mutex_unlock(&fsdb->fsdb_mutex);
-       char    *buf, *params;
-       int      rc = -EINVAL;
-
-        RETURN(rc);
-#endif
-        return 0;
+       RETURN(rc);
 }
 
 int mgs_write_log_target(const struct lu_env *env, struct mgs_device *mgs,
@@ -3612,6 +3858,9 @@ int mgs_write_log_target(const struct lu_env *env, struct mgs_device *mgs,
                rc = 0;
        }
 
+       OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_WRITE_TARGET_DELAY, cfs_fail_val > 0 ?
+                        cfs_fail_val : 10);
+
        mutex_lock(&fsdb->fsdb_mutex);
 
         if (mti->mti_flags &
@@ -3789,6 +4038,19 @@ static void print_lustre_cfg(struct lustre_cfg *lcfg)
         EXIT;
 }
 
+/* Setup _mgs fsdb and log
+ */
+int mgs__mgs_fsdb_setup(const struct lu_env *env, struct mgs_device *mgs,
+                         struct fs_db *fsdb)
+{
+       int                     rc;
+       ENTRY;
+
+       rc = mgs_find_or_make_fsdb(env, mgs, MGSSELF_NAME, &fsdb);
+
+       RETURN(rc);
+}
+
 /* Setup params fsdb and log
  */
 int mgs_params_fsdb_setup(const struct lu_env *env, struct mgs_device *mgs,
@@ -3861,9 +4123,7 @@ int mgs_setparam(const struct lu_env *env, struct mgs_device *mgs,
                         RETURN(-EINVAL);
         } else {
                 /* assume devname is the fsname */
-               memset(fsname, 0, MTI_NAME_MAXLEN);
-                strncpy(fsname, devname, MTI_NAME_MAXLEN);
-               fsname[MTI_NAME_MAXLEN - 1] = 0;
+               strlcpy(fsname, devname, MTI_NAME_MAXLEN);
         }
         CDEBUG(D_MGS, "setparam fs='%s' device='%s'\n", fsname, devname);
 
@@ -3946,7 +4206,7 @@ static int mgs_write_log_pool(const struct lu_env *env,
        if (rc)
                goto out;
        rc = record_base(env, llh, tgtname, 0, cmd,
-                        fsname, poolname, ostname, 0);
+                        fsname, poolname, ostname, NULL);
        if (rc)
                goto out;
        rc = record_marker(env, llh, fsdb, CM_END, tgtname, comment);
@@ -3957,7 +4217,7 @@ out:
 
 int mgs_nodemap_cmd(const struct lu_env *env, struct mgs_device *mgs,
                    enum lcfg_command_type cmd, const char *nodemap_name,
-                   const char *param)
+                   char *param)
 {
        lnet_nid_t      nid[2];
        __u32           idmap[2];
@@ -3989,6 +4249,10 @@ int mgs_nodemap_cmd(const struct lu_env *env, struct mgs_device *mgs,
                bool_switch = simple_strtoul(param, NULL, 10);
                rc = nodemap_set_allow_root(nodemap_name, bool_switch);
                break;
+       case LCFG_NODEMAP_DENY_UNKNOWN:
+               bool_switch = simple_strtoul(param, NULL, 10);
+               rc = nodemap_set_deny_unknown(nodemap_name, bool_switch);
+               break;
        case LCFG_NODEMAP_TRUSTED:
                bool_switch = simple_strtoul(param, NULL, 10);
                rc = nodemap_set_trust_client_ids(nodemap_name, bool_switch);
@@ -4025,6 +4289,9 @@ int mgs_nodemap_cmd(const struct lu_env *env, struct mgs_device *mgs,
                        rc = nodemap_del_idmap(nodemap_name, NODEMAP_GID,
                                               idmap);
                break;
+       case LCFG_NODEMAP_SET_FILESET:
+               rc = nodemap_set_fileset(nodemap_name, param);
+               break;
        default:
                rc = -EINVAL;
        }
@@ -4042,6 +4309,7 @@ int mgs_pool_cmd(const struct lu_env *env, struct mgs_device *mgs,
         char *label = NULL, *canceled_label = NULL;
         int label_sz;
         struct mgs_target_info *mti = NULL;
+       bool checked = false;
         int rc, i;
         ENTRY;
 
@@ -4104,11 +4372,10 @@ int mgs_pool_cmd(const struct lu_env *env, struct mgs_device *mgs,
                 break;
         }
 
-        if (canceled_label != NULL) {
-                OBD_ALLOC_PTR(mti);
-                if (mti == NULL)
-                       GOTO(out_cancel, rc = -ENOMEM);
-        }
+       OBD_ALLOC_PTR(mti);
+       if (mti == NULL)
+               GOTO(out_cancel, rc = -ENOMEM);
+       strncpy(mti->mti_svname, "lov pool", sizeof(mti->mti_svname));
 
        mutex_lock(&fsdb->fsdb_mutex);
         /* write pool def to all MDT logs */
@@ -4120,12 +4387,24 @@ int mgs_pool_cmd(const struct lu_env *env, struct mgs_device *mgs,
                                mutex_unlock(&fsdb->fsdb_mutex);
                                GOTO(out_mti, rc);
                        }
-                        if (canceled_label != NULL) {
-                                strcpy(mti->mti_svname, "lov pool");
+
+                       if (!checked && (canceled_label == NULL)) {
+                               rc = mgs_check_marker(env, mgs, fsdb, mti,
+                                               logname, lovname, label);
+                               if (rc) {
+                                       name_destroy(&logname);
+                                       name_destroy(&lovname);
+                                       mutex_unlock(&fsdb->fsdb_mutex);
+                                       GOTO(out_mti,
+                                               rc = (rc == LLOG_PROC_BREAK ?
+                                                       -EEXIST : rc));
+                               }
+                               checked = true;
+                       }
+                       if (canceled_label != NULL)
                                rc = mgs_modify(env, mgs, fsdb, mti, logname,
                                                lovname, canceled_label,
                                                CM_SKIP);
-                        }
 
                        if (rc >= 0)
                                rc = mgs_write_log_pool(env, mgs, logname,
@@ -4146,6 +4425,17 @@ int mgs_pool_cmd(const struct lu_env *env, struct mgs_device *mgs,
                mutex_unlock(&fsdb->fsdb_mutex);
                GOTO(out_mti, rc);
        }
+
+       if (!checked && (canceled_label == NULL)) {
+               rc = mgs_check_marker(env, mgs, fsdb, mti, logname,
+                               fsdb->fsdb_clilov, label);
+               if (rc) {
+                       name_destroy(&logname);
+                       mutex_unlock(&fsdb->fsdb_mutex);
+                       GOTO(out_mti, rc = (rc == LLOG_PROC_BREAK ?
+                               -EEXIST : rc));
+               }
+       }
        if (canceled_label != NULL) {
                rc = mgs_modify(env, mgs, fsdb, mti, logname,
                                fsdb->fsdb_clilov, canceled_label, CM_SKIP);