Whamcloud - gitweb
LU-10384 mgs: replace_nids large string and failover support 24/30624/13
authorArtem Blagodarenko <artem.blagodarenko@seagate.com>
Mon, 18 Dec 2017 17:09:15 +0000 (20:09 +0300)
committerOleg Drokin <green@whamcloud.com>
Fri, 4 Jan 2019 04:44:45 +0000 (04:44 +0000)
Replace_nids uses nids list as new UUID. UUID string
length is limited by 38 symbols. So new nids list need
to be less then 38 symbols.

With this patch first nid in list string representation
is used for UUID as this done for failover nids.

Replace nids finds records for given device and regenerates
lines that contain old nids. add_uuid and add_conn lines for
failover used to be deleted during replace_nids which breaks
failover configuration.

This patch adds failover support to replace_nids command.
For example:

lctl replace_nids lustre-MDT0000 nid1,nid2:nid3,nid4:nid5,nid6

nid3,nid4 - nids from first failover node
nid5,nid6 - nids from second failover node

Signed-off-by: Artem Blagodarenko <c17828@cray.com>
Cray-bug-id: MRP-4505
Change-Id: I4e9a35e8fa8781909ecbaa74785700f4ca04cf92
Reviewed-on: https://review.whamcloud.com/30624
Tested-by: Jenkins
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Vladimir Saveliev <c17830@cray.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/doc/lctl.8
lustre/mgs/mgs_llog.c
lustre/tests/conf-sanity.sh
lustre/utils/lctl.c

index 7954b7c..5e65d70 100644 (file)
@@ -59,7 +59,7 @@ Print all Network Identifiers on the local node. LNET must be running.
 From a list of nids for a remote node, show which interface communication
 will take place on.
 .TP
-.BI replace_nids " <devicename> <nid1>[,nid2,nid3 ...]"
+.BI replace_nids " <devicename> <nid1>[,nid2,nid3:nid4,nid5:nid6 ...]"
 Replace the LNET Network Identifiers for a given device,
 as when the server's IP address has changed.
 This command must be run on the MGS node.
@@ -68,6 +68,8 @@ in another cases). To start the MGS service only:
 mount -t lustre <MDT partition> -o nosvc <mount point>
 Note the replace_nids command skips any invalidated records in the configuration log.
 The previous log is backed up with the suffix '.bak'.
+Failover nids must be passed after ':' symbol. More then
+one failover can be set (every failover nids after ':' symbol).
 .TP
 .BI ping " <nid> timeout"
 Check LNET connectivity via an LNET ping. This will use the fabric
index 9b1877f..24ecfc5 100644 (file)
@@ -969,18 +969,23 @@ out_pop:
         RETURN(rc);
 }
 
+enum replace_state {
+       REPLACE_COPY = 0,
+       REPLACE_SKIP,
+       REPLACE_DONE,
+       REPLACE_UUID,
+       REPLACE_SETUP
+};
+
 /** This structure is passed to mgs_replace_handler */
 struct mgs_replace_data {
        /* Nids are replaced for this target device */
        struct mgs_target_info target;
        /* Temporary modified llog */
        struct llog_handle *temp_llh;
-       /* Flag is set if in target block*/
-       int in_target_device;
-       /* Nids already added. Just skip (multiple nids) */
-       int device_nids_added;
-       /* Flag is set if this block should not be copied */
-       int skip_it;
+       enum replace_state state;
+       char *failover;
+       char *nodeuuid;
 };
 
 /**
@@ -1006,13 +1011,13 @@ static int check_markers(struct lustre_cfg *lcfg,
                   and can be restored if needed */
                if ((marker->cm_flags & (CM_SKIP | CM_START)) ==
                    (CM_SKIP | CM_START)) {
-                       mrd->skip_it = 1;
+                       mrd->state = REPLACE_SKIP;
                        return 1;
                }
 
                if ((marker->cm_flags & (CM_SKIP | CM_END)) ==
                    (CM_SKIP | CM_END)) {
-                       mrd->skip_it = 0;
+                       mrd->state = REPLACE_COPY;
                        return 1;
                }
 
@@ -1020,10 +1025,10 @@ static int check_markers(struct lustre_cfg *lcfg,
                        LASSERT(!(marker->cm_flags & CM_START) ||
                                !(marker->cm_flags & CM_END));
                        if (marker->cm_flags & CM_START) {
-                               mrd->in_target_device = 1;
-                               mrd->device_nids_added = 0;
+                               mrd->state = REPLACE_UUID;
+                               mrd->failover = NULL;
                        } else if (marker->cm_flags & CM_END)
-                               mrd->in_target_device = 0;
+                               mrd->state = REPLACE_COPY;
                }
        }
 
@@ -1109,38 +1114,56 @@ static int process_command(const struct lu_env *env, struct lustre_cfg *lcfg,
        int nids_added = 0;
        lnet_nid_t nid;
        char *ptr;
-       int rc;
+       int rc = 0;
 
-       if (lcfg->lcfg_command == LCFG_ADD_UUID) {
+       if (mrd->state == REPLACE_UUID &&
+           lcfg->lcfg_command == LCFG_ADD_UUID) {
                /* LCFG_ADD_UUID command found. Let's skip original command
                   and add passed nids */
                ptr = mrd->target.mti_params;
                while (class_parse_nid(ptr, &nid, &ptr) == 0) {
+                       if (!mrd->nodeuuid) {
+                               rc = name_create(&mrd->nodeuuid,
+                                                libcfs_nid2str(nid), "");
+                               if (rc) {
+                                       CERROR("Can't create uuid for "
+                                               "nid  %s, device %s\n",
+                                               libcfs_nid2str(nid),
+                                               mrd->target.mti_svname);
+                                       return rc;
+                               }
+                       }
                        CDEBUG(D_MGS, "add nid %s with uuid %s, "
                               "device %s\n", libcfs_nid2str(nid),
                                mrd->target.mti_params,
-                               mrd->target.mti_svname);
+                               mrd->nodeuuid);
                        rc = record_add_uuid(env,
                                             mrd->temp_llh, nid,
-                                            mrd->target.mti_params);
+                                            mrd->nodeuuid);
                        if (!rc)
                                nids_added++;
+
+                       if (*ptr == ':') {
+                               mrd->failover = ptr;
+                               break;
+                       }
                }
 
                if (nids_added == 0) {
                        CERROR("No new nids were added, nid %s with uuid %s, "
                               "device %s\n", libcfs_nid2str(nid),
-                              mrd->target.mti_params,
+                              mrd->nodeuuid ? mrd->nodeuuid : "NULL",
                               mrd->target.mti_svname);
-                       RETURN(-ENXIO);
+                       name_destroy(&mrd->nodeuuid);
+                       return -ENXIO;
                } else {
-                       mrd->device_nids_added = 1;
+                       mrd->state = REPLACE_SETUP;
                }
 
                return nids_added;
        }
 
-       if (mrd->device_nids_added && lcfg->lcfg_command == LCFG_SETUP) {
+       if (mrd->state == REPLACE_SETUP && lcfg->lcfg_command == LCFG_SETUP) {
                /* LCFG_SETUP command found. UUID should be changed */
                rc = record_setup(env,
                                  mrd->temp_llh,
@@ -1148,13 +1171,55 @@ static int process_command(const struct lu_env *env, struct lustre_cfg *lcfg,
                                  lustre_cfg_string(lcfg, 0),
                                  /* s1 is not changed */
                                  lustre_cfg_string(lcfg, 1),
-                                 /* new uuid should be
-                                 the full nidlist */
-                                 mrd->target.mti_params,
+                                 mrd->nodeuuid,
                                  /* s3 is not changed */
                                  lustre_cfg_string(lcfg, 3),
                                  /* s4 is not changed */
                                  lustre_cfg_string(lcfg, 4));
+
+               name_destroy(&mrd->nodeuuid);
+               if (rc)
+                       return rc;
+
+               if (mrd->failover) {
+                       ptr = mrd->failover;
+                       while (class_parse_nid(ptr, &nid, &ptr) == 0) {
+                               if (mrd->nodeuuid == NULL) {
+                                       rc =  name_create(&mrd->nodeuuid,
+                                                         libcfs_nid2str(nid),
+                                                         "");
+                                       if (rc)
+                                               return rc;
+                               }
+
+                               CDEBUG(D_MGS, "add nid %s for failover %s\n",
+                                      libcfs_nid2str(nid), mrd->nodeuuid);
+                               rc = record_add_uuid(env, mrd->temp_llh, nid,
+                                                    mrd->nodeuuid);
+                               if (rc) {
+                                       name_destroy(&mrd->nodeuuid);
+                                       return rc;
+                               }
+                               if (*ptr == ':') {
+                                       rc = record_add_conn(env,
+                                               mrd->temp_llh,
+                                               lustre_cfg_string(lcfg, 0),
+                                               mrd->nodeuuid);
+                                       name_destroy(&mrd->nodeuuid);
+                                       if (rc)
+                                               return rc;
+                               }
+                       }
+                       if (mrd->nodeuuid) {
+                               rc = record_add_conn(env, mrd->temp_llh,
+                                                    lustre_cfg_string(lcfg, 0),
+                                                    mrd->nodeuuid);
+                               name_destroy(&mrd->nodeuuid);
+                               if (rc)
+                                       return rc;
+                       }
+               }
+               mrd->state = REPLACE_DONE;
                return rc ? rc : 1;
        }
 
@@ -1200,20 +1265,24 @@ static int mgs_replace_nids_handler(const struct lu_env *env,
        }
 
        rc = check_markers(lcfg, mrd);
-       if (rc || mrd->skip_it)
+       if (rc || mrd->state == REPLACE_SKIP)
                GOTO(skip_out, rc = 0);
 
        /* Write to new log all commands outside target device block */
-       if (!mrd->in_target_device)
+       if (mrd->state == REPLACE_COPY)
                GOTO(copy_out, rc = 0);
 
-       /* Skip all other LCFG_ADD_UUID and LCFG_ADD_CONN records
-          (failover nids) for this target, assuming that if then
-          primary is changing then so is the failover */
-       if (mrd->device_nids_added &&
+       if (mrd->state == REPLACE_DONE &&
            (lcfg->lcfg_command == LCFG_ADD_UUID ||
-            lcfg->lcfg_command == LCFG_ADD_CONN))
+            lcfg->lcfg_command == LCFG_ADD_CONN)) {
+               if (!mrd->failover)
+                       CWARN("Previous failover is deleted, but new one is "
+                             "not set. This means you configure system "
+                             "without failover or passed wrong replace_nids "
+                             "command parameters. Device %s, passed nids %s\n",
+                             mrd->target.mti_svname, mrd->target.mti_params);
                GOTO(skip_out, rc = 0);
+       }
 
        rc = process_command(env, lcfg, mrd);
        if (rc < 0)
@@ -1597,9 +1666,9 @@ static int mgs_clear_config_handler(const struct lu_env *env,
                marker = lustre_cfg_buf(lcfg, 1);
                if (marker->cm_flags & CM_SKIP) {
                        if (marker->cm_flags & CM_START)
-                               mrd->skip_it = 1;
+                               mrd->state = REPLACE_SKIP;
                        if (marker->cm_flags & CM_END)
-                               mrd->skip_it = 0;
+                               mrd->state = REPLACE_COPY;
                        /* SKIP section started or finished */
                        CDEBUG(D_MGS, "Skip idx=%d, rc=%d, len=%d, "
                               "cmd %x %s %s\n", rec->lrh_index, rc,
@@ -1609,7 +1678,7 @@ static int mgs_clear_config_handler(const struct lu_env *env,
                        RETURN(0);
                }
        } else {
-               if (mrd->skip_it) {
+               if (mrd->state == REPLACE_SKIP) {
                        /* record enclosed between SKIP markers, skip it */
                        CDEBUG(D_MGS, "Skip idx=%d, rc=%d, len=%d, "
                               "cmd %x %s %s\n", rec->lrh_index, rc,
index 59fa294..ff6151e 100644 (file)
@@ -4758,6 +4758,8 @@ test_66() {
        echo "wrong nids list should not destroy the system"
        do_facet mgs $LCTL replace_nids $FSNAME-OST0000 "wrong nids list" &&
                error "wrong parse"
+       do_facet mgs $LCTL replace_nids $FSNAME-OST0000 "asdfasdf, asdfadf" &&
+               error "wrong parse"
 
        echo "replace OST nid"
        do_facet mgs $LCTL replace_nids $FSNAME-OST0000 $OST1_NID ||
@@ -4771,6 +4773,14 @@ test_66() {
        do_facet mgs $LCTL replace_nids $FSNAME-MDT0000 "wrong nids list" &&
                error "wrong parse"
 
+       local FAKE_NIDS="192.168.0.112@tcp1,192.168.0.112@tcp2"
+       local FAKE_FAILOVER="192.168.0.113@tcp1,192.168.0.113@tcp2"
+       local NIDS_AND_FAILOVER="$MDS_NID,$FAKE_NIDS:$FAKE_FAILOVER"
+       echo "set NIDs with failover"
+       do_facet mgs $LCTL replace_nids $FSNAME-MDT0000 $NIDS_AND_FAILOVER ||
+               error "replace nids failed"
+
+
        echo "replace MDS nid"
        do_facet mgs $LCTL replace_nids $FSNAME-MDT0000 $MDS_NID ||
                error "replace nids failed"
@@ -8266,6 +8276,36 @@ test_123() {
 }
 run_test 123 "clear and reset all parameters using set_param -F"
 
+test_124()
+{
+       [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
+       [ -z $mds2failover_HOST ] && skip "needs MDT failover setup" && return
+
+       setup
+       cleanup
+
+       load_modules
+       if combined_mgs_mds; then
+               start_mdt 1 "-o nosvc" ||
+                       error "starting mds with nosvc option failed"
+       fi
+       local nid=$(do_facet mds2 $LCTL list_nids | head -1)
+       local failover_nid=$(do_node $mds2failover_HOST $LCTL list_nids | head -1)
+       do_facet mgs $LCTL replace_nids $FSNAME-MDT0001 $nid:$failover_nid ||
+               error "replace_nids execution error"
+
+       if combined_mgs_mds; then
+               stop_mdt 1
+       fi
+
+       setup
+       fail mds2
+       echo "lfs setdirstripe"
+       $LFS setdirstripe -i 1 $MOUNT/$tdir || error "setdirstirpe error"
+       echo ok
+}
+run_test 124 "check failover after replace_nids"
+
 if ! combined_mgs_mds ; then
        stop mgs
 fi
index 381a427..5e73d28 100644 (file)
@@ -94,7 +94,7 @@ command_t cmdlist[] = {
         "usage: which_nid NID [NID...]"},
        {"replace_nids", jt_replace_nids, 0,
         "replace primary NIDs for a device\n"
-        "usage: replace_nids <device> <nid1>[,nid2,nid3]"},
+        "usage: replace_nids <device> <nid1>[,nid2,nid3:nid4,nid5:nid6]"},
        {"interface_list", jt_ptl_print_interfaces, 0,
         "print network interface entries\n"
         "usage: interface_list"},