+/**
+ * \retval <0 record processing error
+ * \retval n record is processed. No need copy original one.
+ * \retval 0 record is not processed.
+ */
+static int process_command(const struct lu_env *env, struct lustre_cfg *lcfg,
+ struct mgs_replace_uuid_lookup *mrul)
+{
+ int nids_added = 0;
+ lnet_nid_t nid;
+ char *ptr;
+ int rc;
+
+ if (lcfg->lcfg_command == LCFG_ADD_UUID) {
+ /* LCFG_ADD_UUID command found. Let's skip original command
+ and add passed nids */
+ ptr = mrul->target.mti_params;
+ while (class_parse_nid(ptr, &nid, &ptr) == 0) {
+ CDEBUG(D_MGS, "add nid %s with uuid %s, "
+ "device %s\n", libcfs_nid2str(nid),
+ mrul->target.mti_params,
+ mrul->target.mti_svname);
+ rc = record_add_uuid(env,
+ mrul->temp_llh, nid,
+ mrul->target.mti_params);
+ if (!rc)
+ nids_added++;
+ }
+
+ if (nids_added == 0) {
+ CERROR("No new nids were added, nid %s with uuid %s, "
+ "device %s\n", libcfs_nid2str(nid),
+ mrul->target.mti_params,
+ mrul->target.mti_svname);
+ RETURN(-ENXIO);
+ } else {
+ mrul->device_nids_added = 1;
+ }
+
+ return nids_added;
+ }
+
+ if (mrul->device_nids_added && lcfg->lcfg_command == LCFG_SETUP) {
+ /* LCFG_SETUP command found. UUID should be changed */
+ rc = record_setup(env,
+ mrul->temp_llh,
+ /* devname the same */
+ lustre_cfg_string(lcfg, 0),
+ /* s1 is not changed */
+ lustre_cfg_string(lcfg, 1),
+ /* new uuid should be
+ the full nidlist */
+ mrul->target.mti_params,
+ /* s3 is not changed */
+ lustre_cfg_string(lcfg, 3),
+ /* s4 is not changed */
+ lustre_cfg_string(lcfg, 4));
+ return rc ? rc : 1;
+ }
+
+ /* Another commands in target device block */
+ return 0;
+}
+
+/**
+ * Handler that called for every record in llog.
+ * Records are processed in order they placed in llog.
+ *
+ * \param[in] llh log to be processed
+ * \param[in] rec current record
+ * \param[in] data mgs_replace_uuid_lookup structure
+ *
+ * \retval 0 success
+ */
+static int mgs_replace_handler(const struct lu_env *env,
+ struct llog_handle *llh,
+ struct llog_rec_hdr *rec,
+ void *data)
+{
+ struct mgs_replace_uuid_lookup *mrul;
+ struct lustre_cfg *lcfg = REC_DATA(rec);
+ int cfg_len = REC_DATA_LEN(rec);
+ int rc;
+ ENTRY;
+
+ mrul = (struct mgs_replace_uuid_lookup *)data;
+
+ if (rec->lrh_type != OBD_CFG_REC) {
+ CERROR("unhandled lrh_type: %#x, cmd %x %s %s\n",
+ rec->lrh_type, lcfg->lcfg_command,
+ lustre_cfg_string(lcfg, 0),
+ lustre_cfg_string(lcfg, 1));
+ RETURN(-EINVAL);
+ }
+
+ rc = lustre_cfg_sanity_check(lcfg, cfg_len);
+ if (rc) {
+ /* Do not copy any invalidated records */
+ GOTO(skip_out, rc = 0);
+ }
+
+ rc = check_markers(lcfg, mrul);
+ if (rc || mrul->skip_it)
+ GOTO(skip_out, rc = 0);
+
+ /* Write to new log all commands outside target device block */
+ if (!mrul->in_target_device)
+ GOTO(copy_out, rc = 0);
+
+ /* Skip all other LCFG_ADD_UUID and LCFG_ADD_CONN records
+ (failover nids) for this target, assuming that if then
+ primary is changing then so is the failover */
+ if (mrul->device_nids_added &&
+ (lcfg->lcfg_command == LCFG_ADD_UUID ||
+ lcfg->lcfg_command == LCFG_ADD_CONN))
+ GOTO(skip_out, rc = 0);
+
+ rc = process_command(env, lcfg, mrul);
+ if (rc < 0)
+ RETURN(rc);
+
+ if (rc)
+ RETURN(0);
+copy_out:
+ /* Record is placed in temporary llog as is */
+ rc = llog_write(env, mrul->temp_llh, rec, LLOG_NEXT_IDX);
+
+ CDEBUG(D_MGS, "Copied idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
+ rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
+ lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
+ RETURN(rc);
+
+skip_out:
+ CDEBUG(D_MGS, "Skipped idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
+ rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
+ lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
+ RETURN(rc);
+}
+
+static int mgs_log_is_empty(const struct lu_env *env,
+ struct mgs_device *mgs, char *name)
+{
+ struct llog_ctxt *ctxt;
+ int rc;
+
+ ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
+ LASSERT(ctxt != NULL);
+
+ rc = llog_is_empty(env, ctxt, name);
+ llog_ctxt_put(ctxt);
+ return rc;
+}
+
+static int mgs_replace_nids_log(const struct lu_env *env,
+ struct obd_device *mgs, struct fs_db *fsdb,
+ char *logname, char *devname, char *nids)
+{
+ struct llog_handle *orig_llh, *backup_llh;
+ struct llog_ctxt *ctxt;
+ struct mgs_replace_uuid_lookup *mrul;
+ struct mgs_device *mgs_dev = lu2mgs_dev(mgs->obd_lu_dev);
+ static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" };
+ char *backup;
+ int rc, rc2;
+ ENTRY;
+
+ CDEBUG(D_MGS, "Replace nids for %s in %s\n", devname, logname);
+
+ ctxt = llog_get_context(mgs, LLOG_CONFIG_ORIG_CTXT);
+ LASSERT(ctxt != NULL);
+
+ if (mgs_log_is_empty(env, mgs_dev, logname)) {
+ /* Log is empty. Nothing to replace */
+ GOTO(out_put, rc = 0);
+ }
+
+ OBD_ALLOC(backup, strlen(logname) + strlen(".bak") + 1);
+ if (backup == NULL)
+ GOTO(out_put, rc = -ENOMEM);
+
+ sprintf(backup, "%s.bak", logname);
+
+ rc = llog_backup(env, mgs, ctxt, ctxt, logname, backup);
+ if (rc == 0) {
+ /* Now erase original log file. Connections are not allowed.
+ Backup is already saved */
+ rc = llog_erase(env, ctxt, NULL, logname);
+ if (rc < 0)
+ GOTO(out_free, rc);
+ } else if (rc != -ENOENT) {
+ CERROR("%s: can't make backup for %s: rc = %d\n",
+ mgs->obd_name, logname, rc);
+ GOTO(out_free,rc);
+ }
+
+ /* open local log */
+ rc = llog_open_create(env, ctxt, &orig_llh, NULL, logname);
+ if (rc)
+ GOTO(out_restore, rc);
+
+ rc = llog_init_handle(env, orig_llh, LLOG_F_IS_PLAIN, &cfg_uuid);
+ if (rc)
+ GOTO(out_closel, rc);
+
+ /* open backup llog */
+ rc = llog_open(env, ctxt, &backup_llh, NULL, backup,
+ LLOG_OPEN_EXISTS);
+ if (rc)
+ GOTO(out_closel, rc);
+
+ rc = llog_init_handle(env, backup_llh, LLOG_F_IS_PLAIN, NULL);
+ if (rc)
+ GOTO(out_close, rc);
+
+ if (llog_get_size(backup_llh) <= 1)
+ GOTO(out_close, rc = 0);
+
+ OBD_ALLOC_PTR(mrul);
+ if (!mrul)
+ GOTO(out_close, rc = -ENOMEM);
+ /* devname is only needed information to replace UUID records */
+ strlcpy(mrul->target.mti_svname, devname,
+ sizeof(mrul->target.mti_svname));
+ /* parse nids later */
+ strlcpy(mrul->target.mti_params, nids, sizeof(mrul->target.mti_params));
+ /* Copy records to this temporary llog */
+ mrul->temp_llh = orig_llh;
+
+ rc = llog_process(env, backup_llh, mgs_replace_handler,
+ (void *)mrul, NULL);
+ OBD_FREE_PTR(mrul);
+out_close:
+ rc2 = llog_close(NULL, backup_llh);
+ if (!rc)
+ rc = rc2;
+out_closel:
+ rc2 = llog_close(NULL, orig_llh);
+ if (!rc)
+ rc = rc2;
+
+out_restore:
+ if (rc) {
+ CERROR("%s: llog should be restored: rc = %d\n",
+ mgs->obd_name, rc);
+ rc2 = llog_backup(env, mgs, ctxt, ctxt, backup,
+ logname);
+ if (rc2 < 0)
+ CERROR("%s: can't restore backup %s: rc = %d\n",
+ mgs->obd_name, logname, rc2);
+ }
+
+out_free:
+ OBD_FREE(backup, strlen(backup) + 1);
+
+out_put:
+ llog_ctxt_put(ctxt);
+
+ if (rc)
+ CERROR("%s: failed to replace nids in log %s: rc = %d\n",
+ mgs->obd_name, logname, rc);
+
+ RETURN(rc);
+}
+
+/**
+ * Parse device name and get file system name and/or device index
+ *
+ * \param[in] devname device name (ex. lustre-MDT0000)
+ * \param[out] fsname file system name(optional)
+ * \param[out] index device index(optional)
+ *
+ * \retval 0 success
+ */
+static int mgs_parse_devname(char *devname, char *fsname, __u32 *index)
+{
+ int rc;
+ ENTRY;
+
+ /* Extract fsname */
+ if (fsname) {
+ rc = server_name2fsname(devname, fsname, NULL);
+ if (rc < 0) {
+ CDEBUG(D_MGS, "Device name %s without fsname\n",
+ devname);
+ RETURN(-EINVAL);
+ }
+ }
+
+ if (index) {
+ rc = server_name2index(devname, index, NULL);
+ if (rc < 0) {
+ CDEBUG(D_MGS, "Device name %s with wrong index\n",
+ devname);
+ RETURN(-EINVAL);
+ }
+ }
+
+ RETURN(0);
+}
+
+/* This is only called during replace_nids */
+static int only_mgs_is_running(struct obd_device *mgs_obd)
+{
+ /* TDB: Is global variable with devices count exists? */
+ int num_devices = get_devices_count();
+ int num_exports = 0;
+ struct obd_export *exp;
+
+ spin_lock(&mgs_obd->obd_dev_lock);
+ list_for_each_entry(exp, &mgs_obd->obd_exports, exp_obd_chain) {
+ /* skip self export */
+ if (exp == mgs_obd->obd_self_export)
+ continue;
+ if (exp_connect_flags(exp) & OBD_CONNECT_MDS_MDS)
+ continue;
+
+ ++num_exports;
+
+ CERROR("%s: node %s still connected during replace_nids "
+ "connect_flags:%llx\n",
+ mgs_obd->obd_name,
+ libcfs_nid2str(exp->exp_nid_stats->nid),
+ exp_connect_flags(exp));
+
+ }
+ spin_unlock(&mgs_obd->obd_dev_lock);
+
+ /* osd, MGS and MGC + self_export
+ (wc -l /proc/fs/lustre/devices <= 2) && (non self exports == 0) */
+ return (num_devices <= 3) && (num_exports == 0);
+}
+
+static int name_create_mdt(char **logname, char *fsname, int i)
+{
+ char mdt_index[9];
+
+ sprintf(mdt_index, "-MDT%04x", i);
+ return name_create(logname, fsname, mdt_index);
+}
+
+/**
+ * Replace nids for \a device to \a nids values
+ *
+ * \param obd MGS obd device
+ * \param devname nids need to be replaced for this device
+ * (ex. lustre-OST0000)
+ * \param nids nids list (ex. nid1,nid2,nid3)
+ *
+ * \retval 0 success
+ */
+int mgs_replace_nids(const struct lu_env *env,
+ struct mgs_device *mgs,
+ char *devname, char *nids)
+{
+ /* Assume fsname is part of device name */
+ char fsname[MTI_NAME_MAXLEN];
+ int rc;
+ __u32 index;
+ char *logname;
+ struct fs_db *fsdb;
+ unsigned int i;
+ int conn_state;
+ struct obd_device *mgs_obd = mgs->mgs_obd;
+ ENTRY;
+
+ /* We can only change NIDs if no other nodes are connected */
+ spin_lock(&mgs_obd->obd_dev_lock);
+ conn_state = mgs_obd->obd_no_conn;
+ mgs_obd->obd_no_conn = 1;
+ spin_unlock(&mgs_obd->obd_dev_lock);
+
+ /* We can not change nids if not only MGS is started */
+ if (!only_mgs_is_running(mgs_obd)) {
+ CERROR("Only MGS is allowed to be started\n");
+ GOTO(out, rc = -EINPROGRESS);
+ }
+
+ /* Get fsname and index*/
+ rc = mgs_parse_devname(devname, fsname, &index);
+ if (rc)
+ GOTO(out, rc);
+
+ rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
+ if (rc) {
+ CERROR("%s: can't find fsdb: rc = %d\n", fsname, rc);
+ GOTO(out, rc);
+ }
+
+ /* Process client llogs */
+ name_create(&logname, fsname, "-client");
+ rc = mgs_replace_nids_log(env, mgs_obd, fsdb, logname, devname, nids);
+ name_destroy(&logname);
+ if (rc) {
+ CERROR("%s: error while replacing NIDs for %s: rc = %d\n",
+ fsname, devname, rc);
+ GOTO(out, rc);
+ }
+
+ /* Process MDT llogs */
+ for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
+ if (!test_bit(i, fsdb->fsdb_mdt_index_map))
+ continue;
+ name_create_mdt(&logname, fsname, i);
+ rc = mgs_replace_nids_log(env, mgs_obd, fsdb, logname, devname, nids);
+ name_destroy(&logname);
+ if (rc)
+ GOTO(out, rc);
+ }
+
+out:
+ spin_lock(&mgs_obd->obd_dev_lock);
+ mgs_obd->obd_no_conn = conn_state;
+ spin_unlock(&mgs_obd->obd_dev_lock);
+
+ RETURN(rc);
+}
+