RETURN(rc);
}
+enum replace_state {
+ REPLACE_COPY = 0,
+ REPLACE_SKIP,
+ REPLACE_DONE,
+ REPLACE_UUID,
+ REPLACE_SETUP
+};
+
/** This structure is passed to mgs_replace_handler */
struct mgs_replace_data {
/* Nids are replaced for this target device */
struct mgs_target_info target;
/* Temporary modified llog */
struct llog_handle *temp_llh;
- /* Flag is set if in target block*/
- int in_target_device;
- /* Nids already added. Just skip (multiple nids) */
- int device_nids_added;
- /* Flag is set if this block should not be copied */
- int skip_it;
+ enum replace_state state;
+ char *failover;
+ char *nodeuuid;
};
/**
and can be restored if needed */
if ((marker->cm_flags & (CM_SKIP | CM_START)) ==
(CM_SKIP | CM_START)) {
- mrd->skip_it = 1;
+ mrd->state = REPLACE_SKIP;
return 1;
}
if ((marker->cm_flags & (CM_SKIP | CM_END)) ==
(CM_SKIP | CM_END)) {
- mrd->skip_it = 0;
+ mrd->state = REPLACE_COPY;
return 1;
}
LASSERT(!(marker->cm_flags & CM_START) ||
!(marker->cm_flags & CM_END));
if (marker->cm_flags & CM_START) {
- mrd->in_target_device = 1;
- mrd->device_nids_added = 0;
+ mrd->state = REPLACE_UUID;
+ mrd->failover = NULL;
} else if (marker->cm_flags & CM_END)
- mrd->in_target_device = 0;
+ mrd->state = REPLACE_COPY;
}
}
int nids_added = 0;
lnet_nid_t nid;
char *ptr;
- int rc;
+ int rc = 0;
- if (lcfg->lcfg_command == LCFG_ADD_UUID) {
+ if (mrd->state == REPLACE_UUID &&
+ lcfg->lcfg_command == LCFG_ADD_UUID) {
/* LCFG_ADD_UUID command found. Let's skip original command
and add passed nids */
ptr = mrd->target.mti_params;
while (class_parse_nid(ptr, &nid, &ptr) == 0) {
+ if (!mrd->nodeuuid) {
+ rc = name_create(&mrd->nodeuuid,
+ libcfs_nid2str(nid), "");
+ if (rc) {
+ CERROR("Can't create uuid for "
+ "nid %s, device %s\n",
+ libcfs_nid2str(nid),
+ mrd->target.mti_svname);
+ return rc;
+ }
+ }
CDEBUG(D_MGS, "add nid %s with uuid %s, "
"device %s\n", libcfs_nid2str(nid),
mrd->target.mti_params,
- mrd->target.mti_svname);
+ mrd->nodeuuid);
rc = record_add_uuid(env,
mrd->temp_llh, nid,
- mrd->target.mti_params);
+ mrd->nodeuuid);
if (!rc)
nids_added++;
+
+ if (*ptr == ':') {
+ mrd->failover = ptr;
+ break;
+ }
}
if (nids_added == 0) {
CERROR("No new nids were added, nid %s with uuid %s, "
"device %s\n", libcfs_nid2str(nid),
- mrd->target.mti_params,
+ mrd->nodeuuid ? mrd->nodeuuid : "NULL",
mrd->target.mti_svname);
- RETURN(-ENXIO);
+ name_destroy(&mrd->nodeuuid);
+ return -ENXIO;
} else {
- mrd->device_nids_added = 1;
+ mrd->state = REPLACE_SETUP;
}
return nids_added;
}
- if (mrd->device_nids_added && lcfg->lcfg_command == LCFG_SETUP) {
+ if (mrd->state == REPLACE_SETUP && lcfg->lcfg_command == LCFG_SETUP) {
/* LCFG_SETUP command found. UUID should be changed */
rc = record_setup(env,
mrd->temp_llh,
lustre_cfg_string(lcfg, 0),
/* s1 is not changed */
lustre_cfg_string(lcfg, 1),
- /* new uuid should be
- the full nidlist */
- mrd->target.mti_params,
+ mrd->nodeuuid,
/* s3 is not changed */
lustre_cfg_string(lcfg, 3),
/* s4 is not changed */
lustre_cfg_string(lcfg, 4));
+
+ name_destroy(&mrd->nodeuuid);
+ if (rc)
+ return rc;
+
+ if (mrd->failover) {
+ ptr = mrd->failover;
+ while (class_parse_nid(ptr, &nid, &ptr) == 0) {
+ if (mrd->nodeuuid == NULL) {
+ rc = name_create(&mrd->nodeuuid,
+ libcfs_nid2str(nid),
+ "");
+ if (rc)
+ return rc;
+ }
+
+ CDEBUG(D_MGS, "add nid %s for failover %s\n",
+ libcfs_nid2str(nid), mrd->nodeuuid);
+ rc = record_add_uuid(env, mrd->temp_llh, nid,
+ mrd->nodeuuid);
+ if (rc) {
+ name_destroy(&mrd->nodeuuid);
+ return rc;
+ }
+ if (*ptr == ':') {
+ rc = record_add_conn(env,
+ mrd->temp_llh,
+ lustre_cfg_string(lcfg, 0),
+ mrd->nodeuuid);
+ name_destroy(&mrd->nodeuuid);
+ if (rc)
+ return rc;
+ }
+ }
+ if (mrd->nodeuuid) {
+ rc = record_add_conn(env, mrd->temp_llh,
+ lustre_cfg_string(lcfg, 0),
+ mrd->nodeuuid);
+ name_destroy(&mrd->nodeuuid);
+ if (rc)
+ return rc;
+ }
+ }
+ mrd->state = REPLACE_DONE;
return rc ? rc : 1;
}
}
rc = check_markers(lcfg, mrd);
- if (rc || mrd->skip_it)
+ if (rc || mrd->state == REPLACE_SKIP)
GOTO(skip_out, rc = 0);
/* Write to new log all commands outside target device block */
- if (!mrd->in_target_device)
+ if (mrd->state == REPLACE_COPY)
GOTO(copy_out, rc = 0);
- /* Skip all other LCFG_ADD_UUID and LCFG_ADD_CONN records
- (failover nids) for this target, assuming that if then
- primary is changing then so is the failover */
- if (mrd->device_nids_added &&
+ if (mrd->state == REPLACE_DONE &&
(lcfg->lcfg_command == LCFG_ADD_UUID ||
- lcfg->lcfg_command == LCFG_ADD_CONN))
+ lcfg->lcfg_command == LCFG_ADD_CONN)) {
+ if (!mrd->failover)
+ CWARN("Previous failover is deleted, but new one is "
+ "not set. This means you configure system "
+ "without failover or passed wrong replace_nids "
+ "command parameters. Device %s, passed nids %s\n",
+ mrd->target.mti_svname, mrd->target.mti_params);
GOTO(skip_out, rc = 0);
+ }
rc = process_command(env, lcfg, mrd);
if (rc < 0)
marker = lustre_cfg_buf(lcfg, 1);
if (marker->cm_flags & CM_SKIP) {
if (marker->cm_flags & CM_START)
- mrd->skip_it = 1;
+ mrd->state = REPLACE_SKIP;
if (marker->cm_flags & CM_END)
- mrd->skip_it = 0;
+ mrd->state = REPLACE_COPY;
/* SKIP section started or finished */
CDEBUG(D_MGS, "Skip idx=%d, rc=%d, len=%d, "
"cmd %x %s %s\n", rec->lrh_index, rc,
RETURN(0);
}
} else {
- if (mrd->skip_it) {
+ if (mrd->state == REPLACE_SKIP) {
/* record enclosed between SKIP markers, skip it */
CDEBUG(D_MGS, "Skip idx=%d, rc=%d, len=%d, "
"cmd %x %s %s\n", rec->lrh_index, rc,
echo "wrong nids list should not destroy the system"
do_facet mgs $LCTL replace_nids $FSNAME-OST0000 "wrong nids list" &&
error "wrong parse"
+ do_facet mgs $LCTL replace_nids $FSNAME-OST0000 "asdfasdf, asdfadf" &&
+ error "wrong parse"
echo "replace OST nid"
do_facet mgs $LCTL replace_nids $FSNAME-OST0000 $OST1_NID ||
do_facet mgs $LCTL replace_nids $FSNAME-MDT0000 "wrong nids list" &&
error "wrong parse"
+ local FAKE_NIDS="192.168.0.112@tcp1,192.168.0.112@tcp2"
+ local FAKE_FAILOVER="192.168.0.113@tcp1,192.168.0.113@tcp2"
+ local NIDS_AND_FAILOVER="$MDS_NID,$FAKE_NIDS:$FAKE_FAILOVER"
+ echo "set NIDs with failover"
+ do_facet mgs $LCTL replace_nids $FSNAME-MDT0000 $NIDS_AND_FAILOVER ||
+ error "replace nids failed"
+
+
echo "replace MDS nid"
do_facet mgs $LCTL replace_nids $FSNAME-MDT0000 $MDS_NID ||
error "replace nids failed"
}
run_test 123 "clear and reset all parameters using set_param -F"
+test_124()
+{
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
+ [ -z $mds2failover_HOST ] && skip "needs MDT failover setup" && return
+
+ setup
+ cleanup
+
+ load_modules
+ if combined_mgs_mds; then
+ start_mdt 1 "-o nosvc" ||
+ error "starting mds with nosvc option failed"
+ fi
+ local nid=$(do_facet mds2 $LCTL list_nids | head -1)
+ local failover_nid=$(do_node $mds2failover_HOST $LCTL list_nids | head -1)
+ do_facet mgs $LCTL replace_nids $FSNAME-MDT0001 $nid:$failover_nid ||
+ error "replace_nids execution error"
+
+ if combined_mgs_mds; then
+ stop_mdt 1
+ fi
+
+ setup
+ fail mds2
+ echo "lfs setdirstripe"
+ $LFS setdirstripe -i 1 $MOUNT/$tdir || error "setdirstirpe error"
+ echo ok
+}
+run_test 124 "check failover after replace_nids"
+
if ! combined_mgs_mds ; then
stop mgs
fi