X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fmgs%2Fmgs_llog.c;h=2740b2f375a95d41ab911dfbef80b2099f68ccae;hp=945dca5d9d4075f1d619d18f92f3667146d3fdf6;hb=1f6cb3534e74f0c9462008c8088b5734b64ed41c;hpb=4642f30970c8737f31d63c75eeda2cff15c68a77 diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index 945dca5..2740b2f 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -27,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2013, Intel Corporation. + * Copyright (c) 2011, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -46,32 +42,44 @@ #define D_MGS D_CONFIG #include -#include -#include +#include +#include #include #include +#include #include "mgs_internal.h" /********************** Class functions ********************/ -int class_dentry_readdir(const struct lu_env *env, - struct mgs_device *mgs, cfs_list_t *list) +/** + * Find all logs in CONFIG directory and link then into list. + * + * \param[in] env pointer to the thread context + * \param[in] mgs pointer to the mgs device + * \param[out] log_list the list to hold the found llog name entry + * + * \retval 0 for success + * \retval negative error number on failure + **/ +int class_dentry_readdir(const struct lu_env *env, struct mgs_device *mgs, + struct list_head *log_list) { - struct dt_object *dir = mgs->mgs_configs_dir; + struct dt_object *dir = mgs->mgs_configs_dir; const struct dt_it_ops *iops; - struct dt_it *it; + struct dt_it *it; struct mgs_direntry *de; - char *key; - int rc, key_sz; + char *key; + int rc, key_sz; + size_t suffix_len = sizeof(".bak") - 1; - CFS_INIT_LIST_HEAD(list); + INIT_LIST_HEAD(log_list); LASSERT(dir); LASSERT(dir->do_index_ops); iops = &dir->do_index_ops->dio_it; - it = iops->init(env, dir, LUDA_64BITHASH, BYPASS_CAPA); + it = iops->init(env, dir, LUDA_64BITHASH); if (IS_ERR(it)) RETURN(PTR_ERR(it)); @@ -99,29 +107,47 @@ int class_dentry_readdir(const struct lu_env *env, goto next; } + /* filter out ".bak" files */ + if (key_sz >= suffix_len && + !memcmp(".bak", key + key_sz - suffix_len, suffix_len)) { + CDEBUG(D_MGS, "Skipping backup file %.*s\n", + key_sz, key); + goto next; + } + de = mgs_direntry_alloc(key_sz + 1); if (de == NULL) { rc = -ENOMEM; break; } - memcpy(de->name, key, key_sz); - de->name[key_sz] = 0; + memcpy(de->mde_name, key, key_sz); + de->mde_name[key_sz] = 0; - cfs_list_add(&de->list, list); + list_add(&de->mde_list, log_list); next: rc = iops->next(env, it); } while (rc == 0); - rc = 0; + if (rc > 0) + rc = 0; iops->put(env, it); fini: iops->fini(env, it); - if (rc) + if (rc) { + struct mgs_direntry *n; + CERROR("%s: key failed when listing %s: rc = %d\n", mgs->mgs_obd->obd_name, MOUNT_CONFIGS_DIR, rc); + + list_for_each_entry_safe(de, n, log_list, mde_list) { + list_del_init(&de->mde_list); + mgs_direntry_free(de); + } + } + RETURN(rc); } @@ -165,7 +191,7 @@ static int mgs_fsdb_handler(const struct lu_env *env, struct llog_handle *llh, int cfg_len = rec->lrh_len; char *cfg_buf = (char*) (rec + 1); struct lustre_cfg *lcfg; - __u32 index; + u32 index; int rc = 0; ENTRY; @@ -185,12 +211,14 @@ static int mgs_fsdb_handler(const struct lu_env *env, struct llog_handle *llh, CDEBUG(D_INFO, "cmd %x %s %s\n", lcfg->lcfg_command, lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1)); - /* Figure out ost indicies */ - /* lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1 */ - if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD || - lcfg->lcfg_command == LCFG_LOV_DEL_OBD) { - index = simple_strtoul(lustre_cfg_string(lcfg, 2), - NULL, 10); + /* Figure out ost indicies */ + /* lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1 */ + if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD || + lcfg->lcfg_command == LCFG_LOV_DEL_OBD) { + rc = kstrtouint(lustre_cfg_string(lcfg, 2), 10, &index); + if (rc) + RETURN(rc); + CDEBUG(D_MGS, "OST index for %s is %u (%s)\n", lustre_cfg_string(lcfg, 1), index, lustre_cfg_string(lcfg, 2)); @@ -210,9 +238,11 @@ static int mgs_fsdb_handler(const struct lu_env *env, struct llog_handle *llh, } rc = 0; CDEBUG(D_MGS, "MDT index is %u\n", index); - set_bit(index, fsdb->fsdb_mdt_index_map); - fsdb->fsdb_mdt_count ++; - } + if (!test_bit(index, fsdb->fsdb_mdt_index_map)) { + set_bit(index, fsdb->fsdb_mdt_index_map); + fsdb->fsdb_mdt_count++; + } + } /** * figure out the old config. fsdb_gen = 0 means old log @@ -254,10 +284,12 @@ static int mgs_get_fsdb_from_llog(const struct lu_env *env, struct mgs_device *mgs, struct fs_db *fsdb) { - char *logname; - struct llog_handle *loghandle; - struct llog_ctxt *ctxt; - struct mgs_fsdb_handler_data d = { fsdb, 0 }; + char *logname; + struct llog_handle *loghandle; + struct llog_ctxt *ctxt; + struct mgs_fsdb_handler_data d = { + .fsdb = fsdb, + }; int rc; ENTRY; @@ -310,168 +342,243 @@ static void mgs_free_fsdb_srpc(struct fs_db *fsdb) sptlrpc_rule_set_free(&fsdb->fsdb_srpc_gen); } -struct fs_db *mgs_find_fsdb(struct mgs_device *mgs, char *fsname) +static void mgs_unlink_fsdb(struct mgs_device *mgs, struct fs_db *fsdb) { - struct fs_db *fsdb; - cfs_list_t *tmp; + mutex_lock(&mgs->mgs_mutex); + if (likely(!list_empty(&fsdb->fsdb_list))) { + LASSERTF(atomic_read(&fsdb->fsdb_ref) >= 2, + "Invalid ref %d on %s\n", + atomic_read(&fsdb->fsdb_ref), + fsdb->fsdb_name); + + list_del_init(&fsdb->fsdb_list); + /* Drop the reference on the list.*/ + mgs_put_fsdb(mgs, fsdb); + } + mutex_unlock(&mgs->mgs_mutex); +} - cfs_list_for_each(tmp, &mgs->mgs_fs_db_list) { - fsdb = cfs_list_entry(tmp, struct fs_db, fsdb_list); - if (strcmp(fsdb->fsdb_name, fsname) == 0) - return fsdb; - } - return NULL; +/* The caller must hold mgs->mgs_mutex. */ +static inline struct fs_db * +mgs_find_fsdb_noref(struct mgs_device *mgs, const char *fsname) +{ + struct fs_db *fsdb; + struct list_head *tmp; + + list_for_each(tmp, &mgs->mgs_fs_db_list) { + fsdb = list_entry(tmp, struct fs_db, fsdb_list); + if (strcmp(fsdb->fsdb_name, fsname) == 0) + return fsdb; + } + + return NULL; +} + +/* The caller must hold mgs->mgs_mutex. */ +static void mgs_remove_fsdb_by_name(struct mgs_device *mgs, const char *name) +{ + struct fs_db *fsdb; + + fsdb = mgs_find_fsdb_noref(mgs, name); + if (fsdb) { + list_del_init(&fsdb->fsdb_list); + /* Drop the reference on the list.*/ + mgs_put_fsdb(mgs, fsdb); + } +} + +/* The caller must hold mgs->mgs_mutex. */ +struct fs_db *mgs_find_fsdb(struct mgs_device *mgs, const char *fsname) +{ + struct fs_db *fsdb; + + fsdb = mgs_find_fsdb_noref(mgs, fsname); + if (fsdb) + atomic_inc(&fsdb->fsdb_ref); + + return fsdb; } -/* caller must hold the mgs->mgs_fs_db_lock */ +/* The caller must hold mgs->mgs_mutex. */ static struct fs_db *mgs_new_fsdb(const struct lu_env *env, struct mgs_device *mgs, char *fsname) { - struct fs_db *fsdb; - int rc; - ENTRY; + struct fs_db *fsdb; + int rc; + ENTRY; - if (strlen(fsname) >= sizeof(fsdb->fsdb_name)) { - CERROR("fsname %s is too long\n", fsname); - RETURN(NULL); - } + if (strlen(fsname) >= sizeof(fsdb->fsdb_name)) { + CERROR("fsname %s is too long\n", fsname); - OBD_ALLOC_PTR(fsdb); - if (!fsdb) - RETURN(NULL); + RETURN(ERR_PTR(-EINVAL)); + } + + OBD_ALLOC_PTR(fsdb); + if (!fsdb) + RETURN(ERR_PTR(-ENOMEM)); - strcpy(fsdb->fsdb_name, fsname); + strncpy(fsdb->fsdb_name, fsname, sizeof(fsdb->fsdb_name)); mutex_init(&fsdb->fsdb_mutex); + INIT_LIST_HEAD(&fsdb->fsdb_list); set_bit(FSDB_UDESC, &fsdb->fsdb_flags); fsdb->fsdb_gen = 1; + INIT_LIST_HEAD(&fsdb->fsdb_clients); + atomic_set(&fsdb->fsdb_notify_phase, 0); + init_waitqueue_head(&fsdb->fsdb_notify_waitq); + init_completion(&fsdb->fsdb_notify_comp); - if (strcmp(fsname, MGSSELF_NAME) == 0) { + if (strcmp(fsname, MGSSELF_NAME) == 0) { set_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags); - } else { - OBD_ALLOC(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE); - OBD_ALLOC(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE); - if (!fsdb->fsdb_ost_index_map || !fsdb->fsdb_mdt_index_map) { - CERROR("No memory for index maps\n"); + fsdb->fsdb_mgs = mgs; + if (logname_is_barrier(fsname)) + goto add; + } else { + OBD_ALLOC(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE); + if (!fsdb->fsdb_mdt_index_map) { + CERROR("No memory for MDT index maps\n"); + GOTO(err, rc = -ENOMEM); - } + } - rc = name_create(&fsdb->fsdb_clilov, fsname, "-clilov"); - if (rc) - GOTO(err, rc); - rc = name_create(&fsdb->fsdb_clilmv, fsname, "-clilmv"); - if (rc) - GOTO(err, rc); + OBD_ALLOC(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE); + if (!fsdb->fsdb_ost_index_map) { + CERROR("No memory for OST index maps\n"); - /* initialise data for NID table */ - mgs_ir_init_fs(env, mgs, fsdb); + GOTO(err, rc = -ENOMEM); + } + + if (logname_is_barrier(fsname)) + goto add; + + rc = name_create(&fsdb->fsdb_clilov, fsname, "-clilov"); + if (rc) + GOTO(err, rc); + + rc = name_create(&fsdb->fsdb_clilmv, fsname, "-clilmv"); + if (rc) + GOTO(err, rc); + /* initialise data for NID table */ + mgs_ir_init_fs(env, mgs, fsdb); lproc_mgs_add_live(mgs, fsdb); - } + } + + if (!test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags) && + strcmp(PARAMS_FILENAME, fsname) != 0) { + /* populate the db from the client llog */ + rc = mgs_get_fsdb_from_llog(env, mgs, fsdb); + if (rc) { + CERROR("Can't get db from client log %d\n", rc); + + GOTO(err, rc); + } + } + + /* populate srpc rules from params llog */ + rc = mgs_get_fsdb_srpc_from_llog(env, mgs, fsdb); + if (rc) { + CERROR("Can't get db from params log %d\n", rc); + + GOTO(err, rc); + } + +add: + /* One ref is for the fsdb on the list. + * The other ref is for the caller. */ + atomic_set(&fsdb->fsdb_ref, 2); + list_add(&fsdb->fsdb_list, &mgs->mgs_fs_db_list); - cfs_list_add(&fsdb->fsdb_list, &mgs->mgs_fs_db_list); + RETURN(fsdb); - RETURN(fsdb); err: - if (fsdb->fsdb_ost_index_map) - OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE); - if (fsdb->fsdb_mdt_index_map) - OBD_FREE(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE); - name_destroy(&fsdb->fsdb_clilov); - name_destroy(&fsdb->fsdb_clilmv); - OBD_FREE_PTR(fsdb); - RETURN(NULL); + atomic_set(&fsdb->fsdb_ref, 1); + mgs_put_fsdb(mgs, fsdb); + + RETURN(ERR_PTR(rc)); } static void mgs_free_fsdb(struct mgs_device *mgs, struct fs_db *fsdb) { - /* wait for anyone with the sem */ - mutex_lock(&fsdb->fsdb_mutex); + LASSERT(list_empty(&fsdb->fsdb_list)); + lproc_mgs_del_live(mgs, fsdb); - cfs_list_del(&fsdb->fsdb_list); - - /* deinitialize fsr */ - mgs_ir_fini_fs(mgs, fsdb); - - if (fsdb->fsdb_ost_index_map) - OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE); - if (fsdb->fsdb_mdt_index_map) - OBD_FREE(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE); - name_destroy(&fsdb->fsdb_clilov); - name_destroy(&fsdb->fsdb_clilmv); - mgs_free_fsdb_srpc(fsdb); - mutex_unlock(&fsdb->fsdb_mutex); - OBD_FREE_PTR(fsdb); + + /* deinitialize fsr */ + if (fsdb->fsdb_mgs) + mgs_ir_fini_fs(mgs, fsdb); + + if (fsdb->fsdb_ost_index_map) + OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE); + if (fsdb->fsdb_mdt_index_map) + OBD_FREE(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE); + name_destroy(&fsdb->fsdb_clilov); + name_destroy(&fsdb->fsdb_clilmv); + mgs_free_fsdb_srpc(fsdb); + OBD_FREE_PTR(fsdb); +} + +void mgs_put_fsdb(struct mgs_device *mgs, struct fs_db *fsdb) +{ + if (atomic_dec_and_test(&fsdb->fsdb_ref)) + mgs_free_fsdb(mgs, fsdb); } int mgs_init_fsdb_list(struct mgs_device *mgs) { - CFS_INIT_LIST_HEAD(&mgs->mgs_fs_db_list); + INIT_LIST_HEAD(&mgs->mgs_fs_db_list); return 0; } int mgs_cleanup_fsdb_list(struct mgs_device *mgs) { - struct fs_db *fsdb; - cfs_list_t *tmp, *tmp2; + struct fs_db *fsdb; + struct list_head *tmp, *tmp2; + mutex_lock(&mgs->mgs_mutex); - cfs_list_for_each_safe(tmp, tmp2, &mgs->mgs_fs_db_list) { - fsdb = cfs_list_entry(tmp, struct fs_db, fsdb_list); - mgs_free_fsdb(mgs, fsdb); - } + list_for_each_safe(tmp, tmp2, &mgs->mgs_fs_db_list) { + fsdb = list_entry(tmp, struct fs_db, fsdb_list); + list_del_init(&fsdb->fsdb_list); + mgs_put_fsdb(mgs, fsdb); + } mutex_unlock(&mgs->mgs_mutex); - return 0; + return 0; } -int mgs_find_or_make_fsdb(const struct lu_env *env, - struct mgs_device *mgs, char *name, - struct fs_db **dbh) +/* The caller must hold mgs->mgs_mutex. */ +int mgs_find_or_make_fsdb_nolock(const struct lu_env *env, + struct mgs_device *mgs, + char *name, struct fs_db **dbh) { - struct fs_db *fsdb; - int rc = 0; - + struct fs_db *fsdb; + int rc = 0; ENTRY; - mutex_lock(&mgs->mgs_mutex); + fsdb = mgs_find_fsdb(mgs, name); - if (fsdb) { - mutex_unlock(&mgs->mgs_mutex); - *dbh = fsdb; - RETURN(0); - } + if (!fsdb) { + fsdb = mgs_new_fsdb(env, mgs, name); + if (IS_ERR(fsdb)) + rc = PTR_ERR(fsdb); - CDEBUG(D_MGS, "Creating new db\n"); - fsdb = mgs_new_fsdb(env, mgs, name); - /* lock fsdb_mutex until the db is loaded from llogs */ - if (fsdb) - mutex_lock(&fsdb->fsdb_mutex); - mutex_unlock(&mgs->mgs_mutex); - if (!fsdb) - RETURN(-ENOMEM); + CDEBUG(D_MGS, "Created new db: rc = %d\n", rc); + } - if (!test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) { - /* populate the db from the client llog */ - rc = mgs_get_fsdb_from_llog(env, mgs, fsdb); - if (rc) { - CERROR("Can't get db from client log %d\n", rc); - GOTO(out_free, rc); - } - } + if (!rc) + *dbh = fsdb; - /* populate srpc rules from params llog */ - rc = mgs_get_fsdb_srpc_from_llog(env, mgs, fsdb); - if (rc) { - CERROR("Can't get db from params log %d\n", rc); - GOTO(out_free, rc); - } + RETURN(rc); +} - mutex_unlock(&fsdb->fsdb_mutex); - *dbh = fsdb; +int mgs_find_or_make_fsdb(const struct lu_env *env, struct mgs_device *mgs, + char *name, struct fs_db **dbh) +{ + int rc; + ENTRY; - RETURN(0); + mutex_lock(&mgs->mgs_mutex); + rc = mgs_find_or_make_fsdb_nolock(env, mgs, name, dbh); + mutex_unlock(&mgs->mgs_mutex); -out_free: - mutex_unlock(&fsdb->fsdb_mutex); - mgs_free_fsdb(mgs, fsdb); - return rc; + RETURN(rc); } /* 1 = index in use @@ -495,18 +602,23 @@ int mgs_check_index(const struct lu_env *env, } if (test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) - RETURN(-1); + GOTO(out, rc = -1); - if (mti->mti_flags & LDD_F_SV_TYPE_OST) - imap = fsdb->fsdb_ost_index_map; - else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) - imap = fsdb->fsdb_mdt_index_map; - else - RETURN(-EINVAL); + if (mti->mti_flags & LDD_F_SV_TYPE_OST) + imap = fsdb->fsdb_ost_index_map; + else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) + imap = fsdb->fsdb_mdt_index_map; + else + GOTO(out, rc = -EINVAL); if (test_bit(mti->mti_stripe_index, imap)) - RETURN(1); - RETURN(0); + GOTO(out, rc = 1); + + GOTO(out, rc = 0); + +out: + mgs_put_fsdb(mgs, fsdb); + return rc; } static __inline__ int next_index(void *index_map, int map_len) @@ -520,6 +632,28 @@ static __inline__ int next_index(void *index_map, int map_len) return -1; } +/* Make the mdt/ost server obd name based on the filesystem name */ +static bool server_make_name(u32 flags, u16 index, const char *fs, + char *name_buf, size_t name_buf_size) +{ + bool invalid_flag = false; + + if (flags & (LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST)) { + if (!(flags & LDD_F_SV_ALL)) + snprintf(name_buf, name_buf_size, "%.8s%c%s%04x", fs, + (flags & LDD_F_VIRGIN) ? ':' : + ((flags & LDD_F_WRITECONF) ? '=' : '-'), + (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST", + index); + } else if (flags & LDD_F_SV_TYPE_MGS) { + snprintf(name_buf, name_buf_size, "MGS"); + } else { + CERROR("unknown server type %#x\n", flags); + invalid_flag = true; + } + return invalid_flag; +} + /* Return codes: 0 newly marked as in use <0 err @@ -553,17 +687,16 @@ static int mgs_set_index(const struct lu_env *env, if (rc == -1) GOTO(out_up, rc = -ERANGE); mti->mti_stripe_index = rc; - if (mti->mti_flags & LDD_F_SV_TYPE_MDT) - fsdb->fsdb_mdt_count ++; } - if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8) { - LCONSOLE_ERROR_MSG(0x13f, "Server %s requested index %d, " - "but the max index is %d.\n", - mti->mti_svname, mti->mti_stripe_index, - INDEX_MAP_SIZE * 8); + /* the last index(0xffff) is reserved for default value. */ + if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8 - 1) { + LCONSOLE_ERROR_MSG(0x13f, "Server %s requested index %u, " + "but index must be less than %u.\n", + mti->mti_svname, mti->mti_stripe_index, + INDEX_MAP_SIZE * 8 - 1); GOTO(out_up, rc = -ERANGE); - } + } if (test_bit(mti->mti_stripe_index, imap)) { if ((mti->mti_flags & LDD_F_VIRGIN) && @@ -578,21 +711,30 @@ static int mgs_set_index(const struct lu_env *env, CDEBUG(D_MGS, "Server %s updating index %d\n", mti->mti_svname, mti->mti_stripe_index); GOTO(out_up, rc = EALREADY); - } - } + } + } else { + set_bit(mti->mti_stripe_index, imap); + if (mti->mti_flags & LDD_F_SV_TYPE_MDT) + fsdb->fsdb_mdt_count++; + } set_bit(mti->mti_stripe_index, imap); clear_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags); - mutex_unlock(&fsdb->fsdb_mutex); - server_make_name(mti->mti_flags & ~(LDD_F_VIRGIN | LDD_F_WRITECONF), - mti->mti_stripe_index, mti->mti_fsname, mti->mti_svname); + if (server_make_name(mti->mti_flags & ~(LDD_F_VIRGIN | LDD_F_WRITECONF), + mti->mti_stripe_index, mti->mti_fsname, + mti->mti_svname, sizeof(mti->mti_svname))) { + CERROR("unknown server type %#x\n", mti->mti_flags); + GOTO(out_up, rc = -EINVAL); + } - CDEBUG(D_MGS, "Set index for %s to %d\n", mti->mti_svname, - mti->mti_stripe_index); + CDEBUG(D_MGS, "Set index for %s to %d\n", mti->mti_svname, + mti->mti_stripe_index); + + GOTO(out_up, rc = 0); - RETURN(0); out_up: mutex_unlock(&fsdb->fsdb_mutex); + mgs_put_fsdb(mgs, fsdb); return rc; } @@ -601,6 +743,116 @@ struct mgs_modify_lookup { int mml_modified; }; +static int mgs_check_record_match(const struct lu_env *env, + struct llog_handle *llh, + struct llog_rec_hdr *rec, void *data) +{ + struct cfg_marker *mc_marker = data; + struct cfg_marker *marker; + struct lustre_cfg *lcfg = REC_DATA(rec); + int cfg_len = REC_DATA_LEN(rec); + int rc; + ENTRY; + + + if (rec->lrh_type != OBD_CFG_REC) { + CDEBUG(D_ERROR, "Unhandled lrh_type: %#x\n", rec->lrh_type); + RETURN(-EINVAL); + } + + rc = lustre_cfg_sanity_check(lcfg, cfg_len); + if (rc) { + CDEBUG(D_ERROR, "Insane cfg\n"); + RETURN(rc); + } + + /* We only care about markers */ + if (lcfg->lcfg_command != LCFG_MARKER) + RETURN(0); + + marker = lustre_cfg_buf(lcfg, 1); + + if (marker->cm_flags & CM_SKIP) + RETURN(0); + + if ((strcmp(mc_marker->cm_comment, marker->cm_comment) == 0) && + (strcmp(mc_marker->cm_tgtname, marker->cm_tgtname) == 0)) { + /* Found a non-skipped marker match */ + CDEBUG(D_MGS, "Matched rec %u marker %d flag %x %s %s\n", + rec->lrh_index, marker->cm_step, + marker->cm_flags, marker->cm_tgtname, + marker->cm_comment); + rc = LLOG_PROC_BREAK; + } + + RETURN(rc); +} + +/** + * Check an existing config log record with matching comment and device + * Return code: + * 0 - checked successfully, + * LLOG_PROC_BREAK - record matches + * negative - error + */ +static int mgs_check_marker(const struct lu_env *env, struct mgs_device *mgs, + struct fs_db *fsdb, struct mgs_target_info *mti, + char *logname, char *devname, char *comment) +{ + struct llog_handle *loghandle; + struct llog_ctxt *ctxt; + struct cfg_marker *mc_marker; + int rc; + + ENTRY; + + LASSERT(mutex_is_locked(&fsdb->fsdb_mutex)); + CDEBUG(D_MGS, "mgs check %s/%s/%s\n", logname, devname, comment); + + ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT); + LASSERT(ctxt != NULL); + rc = llog_open(env, ctxt, &loghandle, NULL, logname, LLOG_OPEN_EXISTS); + if (rc < 0) { + if (rc == -ENOENT) + rc = 0; + GOTO(out_pop, rc); + } + + rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL); + if (rc) + GOTO(out_close, rc); + + if (llog_get_size(loghandle) <= 1) + GOTO(out_close, rc = 0); + + OBD_ALLOC_PTR(mc_marker); + if (!mc_marker) + GOTO(out_close, rc = -ENOMEM); + if (strlcpy(mc_marker->cm_comment, comment, + sizeof(mc_marker->cm_comment)) >= + sizeof(mc_marker->cm_comment)) + GOTO(out_free, rc = -E2BIG); + if (strlcpy(mc_marker->cm_tgtname, devname, + sizeof(mc_marker->cm_tgtname)) >= + sizeof(mc_marker->cm_tgtname)) + GOTO(out_free, rc = -E2BIG); + + rc = llog_process(env, loghandle, mgs_check_record_match, + (void *)mc_marker, NULL); + +out_free: + OBD_FREE_PTR(mc_marker); + +out_close: + llog_close(env, loghandle); +out_pop: + if (rc && rc != LLOG_PROC_BREAK) + CDEBUG(D_ERROR, "%s: mgs check %s/%s failed: rc = %d\n", + mgs->mgs_obd->obd_name, mti->mti_svname, comment, rc); + llog_ctxt_put(ctxt); + RETURN(rc); +} + static int mgs_modify_handler(const struct lu_env *env, struct llog_handle *llh, struct llog_rec_hdr *rec, void *data) @@ -640,11 +892,7 @@ static int mgs_modify_handler(const struct lu_env *env, marker->cm_flags &= ~CM_EXCLUDE; /* in case we're unexcluding */ marker->cm_flags |= mml->mml_marker.cm_flags; marker->cm_canceltime = mml->mml_marker.cm_canceltime; - /* Header and tail are added back to lrh_len in - llog_lvfs_write_rec */ - rec->lrh_len = cfg_len; - rc = llog_write(env, llh, rec, NULL, 0, (void *)lcfg, - rec->lrh_index); + rc = llog_write(env, llh, rec, rec->lrh_index); if (!rc) mml->mml_modified++; } @@ -703,7 +951,7 @@ static int mgs_modify(const struct lu_env *env, struct mgs_device *mgs, GOTO(out_free, rc = -E2BIG); /* Modify mostly means cancel */ mml->mml_marker.cm_flags = flags; - mml->mml_marker.cm_canceltime = flags ? cfs_time_current_sec() : 0; + mml->mml_marker.cm_canceltime = flags ? ktime_get_real_seconds() : 0; mml->mml_modified = 0; rc = llog_process(env, loghandle, mgs_modify_handler, (void *)mml, NULL); @@ -723,18 +971,23 @@ out_pop: RETURN(rc); } +enum replace_state { + REPLACE_COPY = 0, + REPLACE_SKIP, + REPLACE_DONE, + REPLACE_UUID, + REPLACE_SETUP +}; + /** This structure is passed to mgs_replace_handler */ -struct mgs_replace_uuid_lookup { +struct mgs_replace_data { /* Nids are replaced for this target device */ struct mgs_target_info target; /* Temporary modified llog */ struct llog_handle *temp_llh; - /* Flag is set if in target block*/ - int in_target_device; - /* Nids already added. Just skip (multiple nids) */ - int device_nids_added; - /* Flag is set if this block should not be copied */ - int skip_it; + enum replace_state state; + char *failover; + char *nodeuuid; }; /** @@ -742,77 +995,54 @@ struct mgs_replace_uuid_lookup { * b) is it target block * * \param[in] lcfg - * \param[in] mrul + * \param[in] mrd * * \retval 0 should not to be skipped * \retval 1 should to be skipped */ static int check_markers(struct lustre_cfg *lcfg, - struct mgs_replace_uuid_lookup *mrul) + struct mgs_replace_data *mrd) { struct cfg_marker *marker; /* Track markers. Find given device */ if (lcfg->lcfg_command == LCFG_MARKER) { marker = lustre_cfg_buf(lcfg, 1); - /* Clean llog from records marked as CM_EXCLUDE. - CM_SKIP records are used for "active" command + /* Clean llog from records marked as CM_SKIP. + CM_EXCLUDE records are used for "active" command and can be restored if needed */ - if ((marker->cm_flags & (CM_EXCLUDE | CM_START)) == - (CM_EXCLUDE | CM_START)) { - mrul->skip_it = 1; + if ((marker->cm_flags & (CM_SKIP | CM_START)) == + (CM_SKIP | CM_START)) { + mrd->state = REPLACE_SKIP; return 1; } - if ((marker->cm_flags & (CM_EXCLUDE | CM_END)) == - (CM_EXCLUDE | CM_END)) { - mrul->skip_it = 0; + if ((marker->cm_flags & (CM_SKIP | CM_END)) == + (CM_SKIP | CM_END)) { + mrd->state = REPLACE_COPY; return 1; } - if (strcmp(mrul->target.mti_svname, marker->cm_tgtname) == 0) { + if (strcmp(mrd->target.mti_svname, marker->cm_tgtname) == 0) { LASSERT(!(marker->cm_flags & CM_START) || !(marker->cm_flags & CM_END)); if (marker->cm_flags & CM_START) { - mrul->in_target_device = 1; - mrul->device_nids_added = 0; + mrd->state = REPLACE_UUID; + mrd->failover = NULL; } else if (marker->cm_flags & CM_END) - mrul->in_target_device = 0; + mrd->state = REPLACE_COPY; } } return 0; } -static int record_lcfg(const struct lu_env *env, struct llog_handle *llh, - struct lustre_cfg *lcfg) -{ - struct llog_rec_hdr rec; - int buflen, rc; - - if (!lcfg || !llh) - return -ENOMEM; - - LASSERT(llh->lgh_ctxt); - - buflen = lustre_cfg_len(lcfg->lcfg_bufcount, - lcfg->lcfg_buflens); - rec.lrh_len = llog_data_len(buflen); - rec.lrh_type = OBD_CFG_REC; - - /* idx = -1 means append */ - rc = llog_write(env, llh, &rec, NULL, 0, (void *)lcfg, -1); - if (rc) - CERROR("failed %d\n", rc); - return rc; -} - static int record_base(const struct lu_env *env, struct llog_handle *llh, char *cfgname, lnet_nid_t nid, int cmd, char *s1, char *s2, char *s3, char *s4) { - struct mgs_thread_info *mgi = mgs_env_info(env); - struct lustre_cfg *lcfg; + struct mgs_thread_info *mgi = mgs_env_info(env); + struct llog_cfg_rec *lcr; int rc; CDEBUG(D_MGS, "lcfg %s %#x %s %s %s %s\n", cfgname, @@ -828,19 +1058,19 @@ static int record_base(const struct lu_env *env, struct llog_handle *llh, if (s4) lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 4, s4); - lcfg = lustre_cfg_new(cmd, &mgi->mgi_bufs); - if (!lcfg) + lcr = lustre_cfg_rec_new(cmd, &mgi->mgi_bufs); + if (lcr == NULL) return -ENOMEM; - lcfg->lcfg_nid = nid; - rc = record_lcfg(env, llh, lcfg); + lcr->lcr_cfg.lcfg_nid = nid; + rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX); - lustre_cfg_free(lcfg); + lustre_cfg_rec_free(lcr); - if (rc) { - CERROR("error %d: lcfg %s %#x %s %s %s %s\n", rc, cfgname, - cmd, s1, s2, s3, s4); - } + if (rc < 0) + CDEBUG(D_MGS, + "failed to write lcfg %s %#x %s %s %s %s: rc = %d\n", + cfgname, cmd, s1, s2, s3, s4, rc); return rc; } @@ -848,21 +1078,24 @@ static inline int record_add_uuid(const struct lu_env *env, struct llog_handle *llh, uint64_t nid, char *uuid) { - return record_base(env, llh, NULL, nid, LCFG_ADD_UUID, uuid, 0, 0, 0); + return record_base(env, llh, NULL, nid, LCFG_ADD_UUID, uuid, + NULL, NULL, NULL); } static inline int record_add_conn(const struct lu_env *env, struct llog_handle *llh, char *devname, char *uuid) { - return record_base(env, llh, devname, 0, LCFG_ADD_CONN, uuid, 0, 0, 0); + return record_base(env, llh, devname, 0, LCFG_ADD_CONN, uuid, + NULL, NULL, NULL); } static inline int record_attach(const struct lu_env *env, struct llog_handle *llh, char *devname, char *type, char *uuid) { - return record_base(env, llh,devname, 0, LCFG_ATTACH, type, uuid, 0, 0); + return record_base(env, llh, devname, 0, LCFG_ATTACH, type, uuid, + NULL, NULL); } static inline int record_setup(const struct lu_env *env, @@ -878,57 +1111,117 @@ static inline int record_setup(const struct lu_env *env, * \retval 0 record is not processed. */ static int process_command(const struct lu_env *env, struct lustre_cfg *lcfg, - struct mgs_replace_uuid_lookup *mrul) + struct mgs_replace_data *mrd) { int nids_added = 0; lnet_nid_t nid; char *ptr; - int rc; + int rc = 0; - if (lcfg->lcfg_command == LCFG_ADD_UUID) { + if (mrd->state == REPLACE_UUID && + lcfg->lcfg_command == LCFG_ADD_UUID) { /* LCFG_ADD_UUID command found. Let's skip original command and add passed nids */ - ptr = mrul->target.mti_params; + ptr = mrd->target.mti_params; while (class_parse_nid(ptr, &nid, &ptr) == 0) { + if (!mrd->nodeuuid) { + rc = name_create(&mrd->nodeuuid, + libcfs_nid2str(nid), ""); + if (rc) { + CERROR("Can't create uuid for " + "nid %s, device %s\n", + libcfs_nid2str(nid), + mrd->target.mti_svname); + return rc; + } + } CDEBUG(D_MGS, "add nid %s with uuid %s, " "device %s\n", libcfs_nid2str(nid), - mrul->target.mti_params, - mrul->target.mti_svname); + mrd->target.mti_params, + mrd->nodeuuid); rc = record_add_uuid(env, - mrul->temp_llh, nid, - mrul->target.mti_params); + mrd->temp_llh, nid, + mrd->nodeuuid); if (!rc) nids_added++; + + if (*ptr == ':') { + mrd->failover = ptr; + break; + } } if (nids_added == 0) { CERROR("No new nids were added, nid %s with uuid %s, " "device %s\n", libcfs_nid2str(nid), - mrul->target.mti_params, - mrul->target.mti_svname); - RETURN(-ENXIO); + mrd->nodeuuid ? mrd->nodeuuid : "NULL", + mrd->target.mti_svname); + name_destroy(&mrd->nodeuuid); + return -ENXIO; } else { - mrul->device_nids_added = 1; + mrd->state = REPLACE_SETUP; } return nids_added; } - if (mrul->device_nids_added && lcfg->lcfg_command == LCFG_SETUP) { + if (mrd->state == REPLACE_SETUP && lcfg->lcfg_command == LCFG_SETUP) { /* LCFG_SETUP command found. UUID should be changed */ rc = record_setup(env, - mrul->temp_llh, + mrd->temp_llh, /* devname the same */ lustre_cfg_string(lcfg, 0), /* s1 is not changed */ lustre_cfg_string(lcfg, 1), - /* new uuid should be - the full nidlist */ - mrul->target.mti_params, + mrd->nodeuuid, /* s3 is not changed */ lustre_cfg_string(lcfg, 3), /* s4 is not changed */ lustre_cfg_string(lcfg, 4)); + + name_destroy(&mrd->nodeuuid); + if (rc) + return rc; + + if (mrd->failover) { + ptr = mrd->failover; + while (class_parse_nid(ptr, &nid, &ptr) == 0) { + if (mrd->nodeuuid == NULL) { + rc = name_create(&mrd->nodeuuid, + libcfs_nid2str(nid), + ""); + if (rc) + return rc; + } + + CDEBUG(D_MGS, "add nid %s for failover %s\n", + libcfs_nid2str(nid), mrd->nodeuuid); + rc = record_add_uuid(env, mrd->temp_llh, nid, + mrd->nodeuuid); + if (rc) { + name_destroy(&mrd->nodeuuid); + return rc; + } + if (*ptr == ':') { + rc = record_add_conn(env, + mrd->temp_llh, + lustre_cfg_string(lcfg, 0), + mrd->nodeuuid); + name_destroy(&mrd->nodeuuid); + if (rc) + return rc; + } + } + if (mrd->nodeuuid) { + rc = record_add_conn(env, mrd->temp_llh, + lustre_cfg_string(lcfg, 0), + mrd->nodeuuid); + name_destroy(&mrd->nodeuuid); + if (rc) + return rc; + } + } + mrd->state = REPLACE_DONE; return rc ? rc : 1; } @@ -942,22 +1235,22 @@ static int process_command(const struct lu_env *env, struct lustre_cfg *lcfg, * * \param[in] llh log to be processed * \param[in] rec current record - * \param[in] data mgs_replace_uuid_lookup structure + * \param[in] data mgs_replace_data structure * * \retval 0 success */ -static int mgs_replace_handler(const struct lu_env *env, - struct llog_handle *llh, - struct llog_rec_hdr *rec, - void *data) +static int mgs_replace_nids_handler(const struct lu_env *env, + struct llog_handle *llh, + struct llog_rec_hdr *rec, + void *data) { - struct mgs_replace_uuid_lookup *mrul; + struct mgs_replace_data *mrd; struct lustre_cfg *lcfg = REC_DATA(rec); int cfg_len = REC_DATA_LEN(rec); int rc; ENTRY; - mrul = (struct mgs_replace_uuid_lookup *)data; + mrd = (struct mgs_replace_data *)data; if (rec->lrh_type != OBD_CFG_REC) { CERROR("unhandled lrh_type: %#x, cmd %x %s %s\n", @@ -973,23 +1266,27 @@ static int mgs_replace_handler(const struct lu_env *env, GOTO(skip_out, rc = 0); } - rc = check_markers(lcfg, mrul); - if (rc || mrul->skip_it) + rc = check_markers(lcfg, mrd); + if (rc || mrd->state == REPLACE_SKIP) GOTO(skip_out, rc = 0); /* Write to new log all commands outside target device block */ - if (!mrul->in_target_device) + if (mrd->state == REPLACE_COPY) GOTO(copy_out, rc = 0); - /* Skip all other LCFG_ADD_UUID and LCFG_ADD_CONN records - (failover nids) for this target, assuming that if then - primary is changing then so is the failover */ - if (mrul->device_nids_added && + if (mrd->state == REPLACE_DONE && (lcfg->lcfg_command == LCFG_ADD_UUID || - lcfg->lcfg_command == LCFG_ADD_CONN)) + lcfg->lcfg_command == LCFG_ADD_CONN)) { + if (!mrd->failover) + CWARN("Previous failover is deleted, but new one is " + "not set. This means you configure system " + "without failover or passed wrong replace_nids " + "command parameters. Device %s, passed nids %s\n", + mrd->target.mti_svname, mrd->target.mti_params); GOTO(skip_out, rc = 0); + } - rc = process_command(env, lcfg, mrul); + rc = process_command(env, lcfg, mrd); if (rc < 0) RETURN(rc); @@ -997,7 +1294,7 @@ static int mgs_replace_handler(const struct lu_env *env, RETURN(0); copy_out: /* Record is placed in temporary llog as is */ - rc = llog_write(env, mrul->temp_llh, rec, NULL, 0, NULL, -1); + rc = llog_write(env, mrd->temp_llh, rec, LLOG_NEXT_IDX); CDEBUG(D_MGS, "Copied idx=%d, rc=%d, len=%d, cmd %x %s %s\n", rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command, @@ -1025,21 +1322,21 @@ static int mgs_log_is_empty(const struct lu_env *env, return rc; } -static int mgs_replace_nids_log(const struct lu_env *env, - struct obd_device *mgs, struct fs_db *fsdb, - char *logname, char *devname, char *nids) +static int mgs_replace_log(const struct lu_env *env, + struct obd_device *mgs, + char *logname, char *devname, + llog_cb_t replace_handler, void *data) { struct llog_handle *orig_llh, *backup_llh; struct llog_ctxt *ctxt; - struct mgs_replace_uuid_lookup *mrul; + struct mgs_replace_data *mrd; struct mgs_device *mgs_dev = lu2mgs_dev(mgs->obd_lu_dev); static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" }; char *backup; - int rc, rc2; + int rc, rc2, buf_size; + time64_t now; ENTRY; - CDEBUG(D_MGS, "Replace nids for %s in %s\n", devname, logname); - ctxt = llog_get_context(mgs, LLOG_CONFIG_ORIG_CTXT); LASSERT(ctxt != NULL); @@ -1048,11 +1345,15 @@ static int mgs_replace_nids_log(const struct lu_env *env, GOTO(out_put, rc = 0); } - OBD_ALLOC(backup, strlen(logname) + strlen(".bak") + 1); + now = ktime_get_real_seconds(); + + /* max time64_t in decimal fits into 20 bytes long string */ + buf_size = strlen(logname) + 1 + 20 + 1 + strlen(".bak") + 1; + OBD_ALLOC(backup, buf_size); if (backup == NULL) GOTO(out_put, rc = -ENOMEM); - sprintf(backup, "%s.bak", logname); + snprintf(backup, buf_size, "%s.%llu.bak", logname, now); rc = llog_backup(env, mgs, ctxt, ctxt, logname, backup); if (rc == 0) { @@ -1089,19 +1390,23 @@ static int mgs_replace_nids_log(const struct lu_env *env, if (llog_get_size(backup_llh) <= 1) GOTO(out_close, rc = 0); - OBD_ALLOC_PTR(mrul); - if (!mrul) + OBD_ALLOC_PTR(mrd); + if (!mrd) GOTO(out_close, rc = -ENOMEM); /* devname is only needed information to replace UUID records */ - strncpy(mrul->target.mti_svname, devname, MTI_NAME_MAXLEN); - /* parse nids later */ - strncpy(mrul->target.mti_params, nids, MTI_PARAM_MAXLEN); + if (devname) + strlcpy(mrd->target.mti_svname, devname, + sizeof(mrd->target.mti_svname)); + /* data is parsed in llog callback */ + if (data) + strlcpy(mrd->target.mti_params, data, + sizeof(mrd->target.mti_params)); /* Copy records to this temporary llog */ - mrul->temp_llh = orig_llh; + mrd->temp_llh = orig_llh; - rc = llog_process(env, backup_llh, mgs_replace_handler, - (void *)mrul, NULL); - OBD_FREE_PTR(mrul); + rc = llog_process(env, backup_llh, replace_handler, + (void *)mrd, NULL); + OBD_FREE_PTR(mrd); out_close: rc2 = llog_close(NULL, backup_llh); if (!rc) @@ -1123,30 +1428,55 @@ out_restore: } out_free: - OBD_FREE(backup, strlen(backup) + 1); + OBD_FREE(backup, buf_size); out_put: llog_ctxt_put(ctxt); if (rc) - CERROR("%s: failed to replace nids in log %s: rc = %d\n", + CERROR("%s: failed to replace log %s: rc = %d\n", mgs->obd_name, logname, rc); RETURN(rc); } +static int mgs_replace_nids_log(const struct lu_env *env, + struct obd_device *obd, + char *logname, char *devname, char *nids) +{ + CDEBUG(D_MGS, "Replace NIDs for %s in %s\n", devname, logname); + return mgs_replace_log(env, obd, logname, devname, + mgs_replace_nids_handler, nids); +} + /** * Parse device name and get file system name and/or device index * - * \param[in] devname device name (ex. lustre-MDT0000) - * \param[out] fsname file system name(optional) - * \param[out] index device index(optional) + * @devname device name (ex. lustre-MDT0000) + * @fsname file system name extracted from @devname and returned + * to the caller (optional) + * @index device index extracted from @devname and returned to + * the caller (optional) * - * \retval 0 success + * RETURN 0 success if we are only interested in + * extracting fsname from devname. + * i.e index is NULL + * + * LDD_F_SV_TYPE_* Besides extracting the fsname the + * user also wants the index. Report to + * the user the type of obd device the + * returned index belongs too. + * + * -EINVAL The obd device name is improper so + * fsname could not be extracted. + * + * -ENXIO Failed to extract the index out of + * the obd device name. Most likely an + * invalid obd device name */ -static int mgs_parse_devname(char *devname, char *fsname, __u32 *index) +static int mgs_parse_devname(char *devname, char *fsname, u32 *index) { - int rc; + int rc = 0; ENTRY; /* Extract fsname */ @@ -1164,20 +1494,44 @@ static int mgs_parse_devname(char *devname, char *fsname, __u32 *index) if (rc < 0) { CDEBUG(D_MGS, "Device name %s with wrong index\n", devname); - RETURN(-EINVAL); + RETURN(-ENXIO); } } - RETURN(0); + /* server_name2index can return LDD_F_SV_TYPE_* so always return rc */ + RETURN(rc); } +/* This is only called during replace_nids */ static int only_mgs_is_running(struct obd_device *mgs_obd) { /* TDB: Is global variable with devices count exists? */ int num_devices = get_devices_count(); + int num_exports = 0; + struct obd_export *exp; + + spin_lock(&mgs_obd->obd_dev_lock); + list_for_each_entry(exp, &mgs_obd->obd_exports, exp_obd_chain) { + /* skip self export */ + if (exp == mgs_obd->obd_self_export) + continue; + if (exp_connect_flags(exp) & OBD_CONNECT_MDS_MDS) + continue; + + ++num_exports; + + CERROR("%s: node %s still connected during replace_nids " + "connect_flags:%llx\n", + mgs_obd->obd_name, + libcfs_nid2str(exp->exp_nid_stats->nid), + exp_connect_flags(exp)); + + } + spin_unlock(&mgs_obd->obd_dev_lock); + /* osd, MGS and MGC + self_export - (wc -l /proc/fs/lustre/devices <= 2) && (num_exports <= 2) */ - return (num_devices <= 3) && (mgs_obd->obd_num_exports <= 2); + (wc -l /proc/fs/lustre/devices <= 2) && (non self exports == 0) */ + return (num_devices <= 3) && (num_exports == 0); } static int name_create_mdt(char **logname, char *fsname, int i) @@ -1207,7 +1561,7 @@ int mgs_replace_nids(const struct lu_env *env, int rc; __u32 index; char *logname; - struct fs_db *fsdb; + struct fs_db *fsdb = NULL; unsigned int i; int conn_state; struct obd_device *mgs_obd = mgs->mgs_obd; @@ -1225,9 +1579,9 @@ int mgs_replace_nids(const struct lu_env *env, GOTO(out, rc = -EINPROGRESS); } - /* Get fsname and index*/ + /* Get fsname and index */ rc = mgs_parse_devname(devname, fsname, &index); - if (rc) + if (rc < 0) GOTO(out, rc); rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb); @@ -1238,7 +1592,7 @@ int mgs_replace_nids(const struct lu_env *env, /* Process client llogs */ name_create(&logname, fsname, "-client"); - rc = mgs_replace_nids_log(env, mgs_obd, fsdb, logname, devname, nids); + rc = mgs_replace_nids_log(env, mgs_obd, logname, devname, nids); name_destroy(&logname); if (rc) { CERROR("%s: error while replacing NIDs for %s: rc = %d\n", @@ -1251,7 +1605,7 @@ int mgs_replace_nids(const struct lu_env *env, if (!test_bit(i, fsdb->fsdb_mdt_index_map)) continue; name_create_mdt(&logname, fsname, i); - rc = mgs_replace_nids_log(env, mgs_obd, fsdb, logname, devname, nids); + rc = mgs_replace_nids_log(env, mgs_obd, logname, devname, nids); name_destroy(&logname); if (rc) GOTO(out, rc); @@ -1262,41 +1616,227 @@ out: mgs_obd->obd_no_conn = conn_state; spin_unlock(&mgs_obd->obd_dev_lock); + if (fsdb) + mgs_put_fsdb(mgs, fsdb); + + RETURN(rc); +} + +/** + * This is called for every record in llog. Some of records are + * skipped, others are copied to new log as is. + * Records to be skipped are + * marker records marked SKIP + * records enclosed between SKIP markers + * + * \param[in] llh log to be processed + * \param[in] rec current record + * \param[in] data mgs_replace_data structure + * + * \retval 0 success + **/ +static int mgs_clear_config_handler(const struct lu_env *env, + struct llog_handle *llh, + struct llog_rec_hdr *rec, void *data) +{ + struct mgs_replace_data *mrd; + struct lustre_cfg *lcfg = REC_DATA(rec); + int cfg_len = REC_DATA_LEN(rec); + int rc; + + ENTRY; + + mrd = (struct mgs_replace_data *)data; + + if (rec->lrh_type != OBD_CFG_REC) { + CDEBUG(D_MGS, "Config llog Name=%s, Record Index=%u, " + "Unhandled Record Type=%#x\n", llh->lgh_name, + rec->lrh_index, rec->lrh_type); + RETURN(-EINVAL); + } + + rc = lustre_cfg_sanity_check(lcfg, cfg_len); + if (rc) { + CDEBUG(D_MGS, "Config llog Name=%s, Invalid config file.", + llh->lgh_name); + RETURN(-EINVAL); + } + + if (lcfg->lcfg_command == LCFG_MARKER) { + struct cfg_marker *marker; + + marker = lustre_cfg_buf(lcfg, 1); + if (marker->cm_flags & CM_SKIP) { + if (marker->cm_flags & CM_START) + mrd->state = REPLACE_SKIP; + if (marker->cm_flags & CM_END) + mrd->state = REPLACE_COPY; + /* SKIP section started or finished */ + CDEBUG(D_MGS, "Skip idx=%d, rc=%d, len=%d, " + "cmd %x %s %s\n", rec->lrh_index, rc, + rec->lrh_len, lcfg->lcfg_command, + lustre_cfg_string(lcfg, 0), + lustre_cfg_string(lcfg, 1)); + RETURN(0); + } + } else { + if (mrd->state == REPLACE_SKIP) { + /* record enclosed between SKIP markers, skip it */ + CDEBUG(D_MGS, "Skip idx=%d, rc=%d, len=%d, " + "cmd %x %s %s\n", rec->lrh_index, rc, + rec->lrh_len, lcfg->lcfg_command, + lustre_cfg_string(lcfg, 0), + lustre_cfg_string(lcfg, 1)); + RETURN(0); + } + } + + /* Record is placed in temporary llog as is */ + rc = llog_write(env, mrd->temp_llh, rec, LLOG_NEXT_IDX); + + CDEBUG(D_MGS, "Copied idx=%d, rc=%d, len=%d, cmd %x %s %s\n", + rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command, + lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1)); + RETURN(rc); +} + +/* + * Directory CONFIGS/ may contain files which are not config logs to + * be cleared. Skip any llogs with a non-alphanumeric character after + * the last '-'. For example, fsname-MDT0000.sav, fsname-MDT0000.bak, + * fsname-MDT0000.orig, fsname-MDT0000~, fsname-MDT0000.20150516, etc. + */ +static bool config_to_clear(const char *logname) +{ + int i; + char *str; + + str = strrchr(logname, '-'); + if (!str) + return 0; + + i = 0; + while (isalnum(str[++i])); + return str[i] == '\0'; +} + +/** + * Clear config logs for \a name + * + * \param env + * \param mgs MGS device + * \param name name of device or of filesystem + * (ex. lustre-OST0000 or lustre) in later case all logs + * will be cleared + * + * \retval 0 success + */ +int mgs_clear_configs(const struct lu_env *env, + struct mgs_device *mgs, const char *name) +{ + struct list_head dentry_list; + struct mgs_direntry *dirent, *n; + char *namedash; + int conn_state; + struct obd_device *mgs_obd = mgs->mgs_obd; + int rc; + + ENTRY; + + /* Prevent clients and servers from connecting to mgs */ + spin_lock(&mgs_obd->obd_dev_lock); + conn_state = mgs_obd->obd_no_conn; + mgs_obd->obd_no_conn = 1; + spin_unlock(&mgs_obd->obd_dev_lock); + + /* + * config logs cannot be cleaned if anything other than + * MGS is started + */ + if (!only_mgs_is_running(mgs_obd)) { + CERROR("Only MGS is allowed to be started\n"); + GOTO(out, rc = -EBUSY); + } + + /* Find all the logs in the CONFIGS directory */ + rc = class_dentry_readdir(env, mgs, &dentry_list); + if (rc) { + CERROR("%s: cannot read config directory '%s': rc = %d\n", + mgs_obd->obd_name, MOUNT_CONFIGS_DIR, rc); + GOTO(out, rc); + } + + if (list_empty(&dentry_list)) { + CERROR("%s: list empty reading config dir '%s': rc = %d\n", + mgs_obd->obd_name, MOUNT_CONFIGS_DIR, -ENOENT); + GOTO(out, rc = -ENOENT); + } + + OBD_ALLOC(namedash, strlen(name) + 2); + if (namedash == NULL) + GOTO(out, rc = -ENOMEM); + snprintf(namedash, strlen(name) + 2, "%s-", name); + + list_for_each_entry(dirent, &dentry_list, mde_list) { + if (strcmp(name, dirent->mde_name) && + strncmp(namedash, dirent->mde_name, strlen(namedash))) + continue; + if (!config_to_clear(dirent->mde_name)) + continue; + CDEBUG(D_MGS, "%s: Clear config log %s\n", + mgs_obd->obd_name, dirent->mde_name); + rc = mgs_replace_log(env, mgs_obd, dirent->mde_name, NULL, + mgs_clear_config_handler, NULL); + if (rc) + break; + } + + list_for_each_entry_safe(dirent, n, &dentry_list, mde_list) { + list_del_init(&dirent->mde_list); + mgs_direntry_free(dirent); + } + OBD_FREE(namedash, strlen(name) + 2); +out: + spin_lock(&mgs_obd->obd_dev_lock); + mgs_obd->obd_no_conn = conn_state; + spin_unlock(&mgs_obd->obd_dev_lock); + RETURN(rc); } static int record_lov_setup(const struct lu_env *env, struct llog_handle *llh, char *devname, struct lov_desc *desc) { - struct mgs_thread_info *mgi = mgs_env_info(env); - struct lustre_cfg *lcfg; + struct mgs_thread_info *mgi = mgs_env_info(env); + struct llog_cfg_rec *lcr; int rc; lustre_cfg_bufs_reset(&mgi->mgi_bufs, devname); lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, desc, sizeof(*desc)); - lcfg = lustre_cfg_new(LCFG_SETUP, &mgi->mgi_bufs); - if (!lcfg) + lcr = lustre_cfg_rec_new(LCFG_SETUP, &mgi->mgi_bufs); + if (lcr == NULL) return -ENOMEM; - rc = record_lcfg(env, llh, lcfg); - lustre_cfg_free(lcfg); + rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX); + lustre_cfg_rec_free(lcr); return rc; } static int record_lmv_setup(const struct lu_env *env, struct llog_handle *llh, char *devname, struct lmv_desc *desc) { - struct mgs_thread_info *mgi = mgs_env_info(env); - struct lustre_cfg *lcfg; + struct mgs_thread_info *mgi = mgs_env_info(env); + struct llog_cfg_rec *lcr; int rc; lustre_cfg_bufs_reset(&mgi->mgi_bufs, devname); lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, desc, sizeof(*desc)); - lcfg = lustre_cfg_new(LCFG_SETUP, &mgi->mgi_bufs); - - rc = record_lcfg(env, llh, lcfg); + lcr = lustre_cfg_rec_new(LCFG_SETUP, &mgi->mgi_bufs); + if (lcr == NULL) + return -ENOMEM; - lustre_cfg_free(lcfg); + rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX); + lustre_cfg_rec_free(lcr); return rc; } @@ -1315,8 +1855,8 @@ static inline int record_lov_add(const struct lu_env *env, char *lov_name, char *ost_uuid, char *index, char *gen) { - return record_base(env,llh,lov_name,0,LCFG_LOV_ADD_OBD, - ost_uuid, index, gen, 0); + return record_base(env, llh, lov_name, 0, LCFG_LOV_ADD_OBD, + ost_uuid, index, gen, NULL); } static inline int record_mount_opt(const struct lu_env *env, @@ -1324,8 +1864,8 @@ static inline int record_mount_opt(const struct lu_env *env, char *profile, char *lov_name, char *mdc_name) { - return record_base(env,llh,NULL,0,LCFG_MOUNTOPT, - profile,lov_name,mdc_name,0); + return record_base(env, llh, NULL, 0, LCFG_MOUNTOPT, + profile, lov_name, mdc_name, NULL); } static int record_marker(const struct lu_env *env, @@ -1334,7 +1874,7 @@ static int record_marker(const struct lu_env *env, char *tgtname, char *comment) { struct mgs_thread_info *mgi = mgs_env_info(env); - struct lustre_cfg *lcfg; + struct llog_cfg_rec *lcr; int rc; int cplen = 0; @@ -1351,17 +1891,17 @@ static int record_marker(const struct lu_env *env, sizeof(mgi->mgi_marker.cm_comment)); if (cplen >= sizeof(mgi->mgi_marker.cm_comment)) return -E2BIG; - mgi->mgi_marker.cm_createtime = cfs_time_current_sec(); + mgi->mgi_marker.cm_createtime = ktime_get_real_seconds(); mgi->mgi_marker.cm_canceltime = 0; lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL); lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, &mgi->mgi_marker, sizeof(mgi->mgi_marker)); - lcfg = lustre_cfg_new(LCFG_MARKER, &mgi->mgi_bufs); - if (!lcfg) + lcr = lustre_cfg_rec_new(LCFG_MARKER, &mgi->mgi_bufs); + if (lcr == NULL) return -ENOMEM; - rc = record_lcfg(env, llh, lcfg); - lustre_cfg_free(lcfg); + rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX); + lustre_cfg_rec_free(lcr); return rc; } @@ -1371,6 +1911,7 @@ static int record_start_log(const struct lu_env *env, struct mgs_device *mgs, static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" }; struct llog_ctxt *ctxt; int rc = 0; + ENTRY; if (*llh) GOTO(out, rc = -EBUSY); @@ -1412,25 +1953,23 @@ static int record_end_log(const struct lu_env *env, struct llog_handle **llh) /* write an lcfg directly into a log (with markers) */ static int mgs_write_log_direct(const struct lu_env *env, struct mgs_device *mgs, struct fs_db *fsdb, - char *logname, struct lustre_cfg *lcfg, - char *devname, char *comment) + char *logname, struct llog_cfg_rec *lcr, + char *devname, char *comment) { - struct llog_handle *llh = NULL; - int rc; - ENTRY; + struct llog_handle *llh = NULL; + int rc; - if (!lcfg) - RETURN(-ENOMEM); + ENTRY; rc = record_start_log(env, mgs, &llh, logname); - if (rc) - RETURN(rc); + if (rc) + RETURN(rc); /* FIXME These should be a single journal transaction */ rc = record_marker(env, llh, fsdb, CM_START, devname, comment); if (rc) GOTO(out_end, rc); - rc = record_lcfg(env, llh, lcfg); + rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX); if (rc) GOTO(out_end, rc); rc = record_marker(env, llh, fsdb, CM_END, devname, comment); @@ -1442,79 +1981,60 @@ out_end: } /* write the lcfg in all logs for the given fs */ -int mgs_write_log_direct_all(const struct lu_env *env, - struct mgs_device *mgs, - struct fs_db *fsdb, - struct mgs_target_info *mti, - struct lustre_cfg *lcfg, - char *devname, char *comment, - int server_only) +static int mgs_write_log_direct_all(const struct lu_env *env, + struct mgs_device *mgs, + struct fs_db *fsdb, + struct mgs_target_info *mti, + struct llog_cfg_rec *lcr, char *devname, + char *comment, int server_only) { - cfs_list_t list; - struct mgs_direntry *dirent, *n; - char *fsname = mti->mti_fsname; - char *logname; - int rc = 0, len = strlen(fsname); - ENTRY; + struct list_head log_list; + struct mgs_direntry *dirent, *n; + char *fsname = mti->mti_fsname; + int rc = 0, len = strlen(fsname); - /* We need to set params for any future logs - as well. FIXME Append this file to every new log. - Actually, we should store as params (text), not llogs. Or - in a database. */ - rc = name_create(&logname, fsname, "-params"); + ENTRY; + /* Find all the logs in the CONFIGS directory */ + rc = class_dentry_readdir(env, mgs, &log_list); if (rc) RETURN(rc); - if (mgs_log_is_empty(env, mgs, logname)) { - struct llog_handle *llh = NULL; - rc = record_start_log(env, mgs, &llh, logname); - if (rc == 0) - record_end_log(env, &llh); - } - name_destroy(&logname); - if (rc) - RETURN(rc); - - /* Find all the logs in the CONFIGS directory */ - rc = class_dentry_readdir(env, mgs, &list); - if (rc) - RETURN(rc); - /* Could use fsdb index maps instead of directory listing */ - cfs_list_for_each_entry_safe(dirent, n, &list, list) { - cfs_list_del(&dirent->list); - /* don't write to sptlrpc rule log */ - if (strstr(dirent->name, "-sptlrpc") != NULL) + /* Could use fsdb index maps instead of directory listing */ + list_for_each_entry_safe(dirent, n, &log_list, mde_list) { + list_del_init(&dirent->mde_list); + /* don't write to sptlrpc rule log */ + if (strstr(dirent->mde_name, "-sptlrpc") != NULL) goto next; /* caller wants write server logs only */ - if (server_only && strstr(dirent->name, "-client") != NULL) + if (server_only && strstr(dirent->mde_name, "-client") != NULL) goto next; - if (strncmp(fsname, dirent->name, len) == 0) { - CDEBUG(D_MGS, "Changing log %s\n", dirent->name); - /* Erase any old settings of this same parameter */ - rc = mgs_modify(env, mgs, fsdb, mti, dirent->name, - devname, comment, CM_SKIP); - if (rc < 0) - CERROR("%s: Can't modify llog %s: rc = %d\n", - mgs->mgs_obd->obd_name, dirent->name,rc); - /* Write the new one */ - if (lcfg) { - rc = mgs_write_log_direct(env, mgs, fsdb, - dirent->name, - lcfg, devname, - comment); - if (rc) - CERROR("%s: writing log %s: rc = %d\n", - mgs->mgs_obd->obd_name, - dirent->name, rc); - } - } + if (strlen(dirent->mde_name) <= len || + strncmp(fsname, dirent->mde_name, len) != 0 || + dirent->mde_name[len] != '-') + goto next; + + CDEBUG(D_MGS, "Changing log %s\n", dirent->mde_name); + /* Erase any old settings of this same parameter */ + rc = mgs_modify(env, mgs, fsdb, mti, dirent->mde_name, + devname, comment, CM_SKIP); + if (rc < 0) + CERROR("%s: Can't modify llog %s: rc = %d\n", + mgs->mgs_obd->obd_name, dirent->mde_name, rc); + if (lcr == NULL) + goto next; + /* Write the new one */ + rc = mgs_write_log_direct(env, mgs, fsdb, dirent->mde_name, + lcr, devname, comment); + if (rc != 0) + CERROR("%s: writing log %s: rc = %d\n", + mgs->mgs_obd->obd_name, dirent->mde_name, rc); next: mgs_direntry_free(dirent); - } + } - RETURN(rc); + RETURN(rc); } static int mgs_write_log_osp_to_mdt(const struct lu_env *env, @@ -1658,7 +2178,7 @@ static int mgs_steal_client_llog_handler(const struct lu_env *env, RETURN(rc); if (lcfg->lcfg_command == LCFG_ADD_UUID) { - uint64_t nodenid = lcfg->lcfg_nid; + __u64 nodenid = lcfg->lcfg_nid; if (strlen(tmti->mti_uuid) == 0) { /* target uuid not set, this config record is before @@ -1667,9 +2187,12 @@ static int mgs_steal_client_llog_handler(const struct lu_env *env, tmti->mti_nids[tmti->mti_nid_count] = nodenid; tmti->mti_nid_count++; } else { + char nidstr[LNET_NIDSTR_SIZE]; + /* failover node nid */ + libcfs_nid2str_r(nodenid, nidstr, sizeof(nidstr)); rc = add_param(tmti->mti_params, PARAM_FAILNODE, - libcfs_nid2str(nodenid)); + nidstr); } RETURN(rc); @@ -1687,7 +2210,8 @@ static int mgs_steal_client_llog_handler(const struct lu_env *env, if (lcfg->lcfg_command == LCFG_SPTLRPC_CONF) RETURN(rc); - if (lcfg->lcfg_command == LCFG_ADD_MDC) { + if (lcfg->lcfg_command == LCFG_ADD_MDC && + strstr(lustre_cfg_string(lcfg, 0), "-clilmv") != NULL) { int index; if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1) @@ -1854,9 +2378,9 @@ static int mgs_write_log_lov(const struct lu_env *env, struct mgs_device *mgs, /* Defaults. Can be changed later by lcfg config_param */ lovdesc->ld_default_stripe_count = 1; lovdesc->ld_pattern = LOV_PATTERN_RAID0; - lovdesc->ld_default_stripe_size = 1024 * 1024; + lovdesc->ld_default_stripe_size = LOV_DESC_STRIPE_SIZE_DEFAULT; lovdesc->ld_default_stripe_offset = -1; - lovdesc->ld_qos_maxage = QOS_DEFAULT_MAXAGE; + lovdesc->ld_qos_maxage = LOV_DESC_QOS_MAXAGE_DEFAULT; sprintf((char*)lovdesc->ld_uuid.uuid, "%s_UUID", lovname); /* can these be the same? */ uuid = (char *)lovdesc->ld_uuid.uuid; @@ -1907,30 +2431,48 @@ static int mgs_write_log_failnids(const struct lu_env *env, #07 L add_conn 0:OSC_uml1_ost1_mdsA 1:uml2_UUID */ - /* Pull failnid info out of params string */ + /* + * Pull failnid info out of params string, which may contain something + * like ",:,". class_parse_nid() does not + * complain about abnormal inputs like ",:", ":,", + * etc. However, convert_hostnames() should have caught those. + */ while (class_find_param(ptr, PARAM_FAILNODE, &ptr) == 0) { while (class_parse_nid(ptr, &nid, &ptr) == 0) { - if (failnodeuuid == NULL) { - /* We don't know the failover node name, - so just use the first nid as the uuid */ - rc = name_create(&failnodeuuid, - libcfs_nid2str(nid), ""); - if (rc) - return rc; - } - CDEBUG(D_MGS, "add nid %s for failover uuid %s, " - "client %s\n", libcfs_nid2str(nid), - failnodeuuid, cliname); + char nidstr[LNET_NIDSTR_SIZE]; + + if (failnodeuuid == NULL) { + /* We don't know the failover node name, + * so just use the first nid as the uuid */ + libcfs_nid2str_r(nid, nidstr, sizeof(nidstr)); + rc = name_create(&failnodeuuid, nidstr, ""); + if (rc != 0) + return rc; + } + CDEBUG(D_MGS, "add nid %s for failover uuid %s, " + "client %s\n", + libcfs_nid2str_r(nid, nidstr, sizeof(nidstr)), + failnodeuuid, cliname); rc = record_add_uuid(env, llh, nid, failnodeuuid); - } + /* + * If *ptr is ':', we have added all NIDs for + * failnodeuuid. + */ + if (*ptr == ':') { + rc = record_add_conn(env, llh, cliname, + failnodeuuid); + name_destroy(&failnodeuuid); + failnodeuuid = NULL; + } + } if (failnodeuuid) { rc = record_add_conn(env, llh, cliname, failnodeuuid); name_destroy(&failnodeuuid); failnodeuuid = NULL; } - } + } - return rc; + return rc; } static int mgs_write_log_mdc_to_lmv(const struct lu_env *env, @@ -1944,9 +2486,10 @@ static int mgs_write_log_mdc_to_lmv(const struct lu_env *env, char *nodeuuid = NULL; char *mdcuuid = NULL; char *lmvuuid = NULL; - char index[6]; - int i, rc; - ENTRY; + char index[6]; + char nidstr[LNET_NIDSTR_SIZE]; + int i, rc; + ENTRY; if (mgs_log_is_empty(env, mgs, logname)) { CERROR("log is empty! Logical error\n"); @@ -1956,7 +2499,8 @@ static int mgs_write_log_mdc_to_lmv(const struct lu_env *env, CDEBUG(D_MGS, "adding mdc for %s to log %s:lmv(%s)\n", mti->mti_svname, logname, lmvname); - rc = name_create(&nodeuuid, libcfs_nid2str(mti->mti_nids[0]), ""); + libcfs_nid2str_r(mti->mti_nids[0], nidstr, sizeof(nidstr)); + rc = name_create(&nodeuuid, nidstr, ""); if (rc) RETURN(rc); rc = name_create(&mdcname, mti->mti_svname, "-mdc"); @@ -1976,19 +2520,21 @@ static int mgs_write_log_mdc_to_lmv(const struct lu_env *env, "add mdc"); if (rc) GOTO(out_end, rc); - for (i = 0; i < mti->mti_nid_count; i++) { - CDEBUG(D_MGS, "add nid %s for mdt\n", - libcfs_nid2str(mti->mti_nids[i])); + for (i = 0; i < mti->mti_nid_count; i++) { + CDEBUG(D_MGS, "add nid %s for mdt\n", + libcfs_nid2str_r(mti->mti_nids[i], + nidstr, sizeof(nidstr))); rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid); if (rc) GOTO(out_end, rc); - } + } rc = record_attach(env, llh, mdcname, LUSTRE_MDC_NAME, lmvuuid); if (rc) GOTO(out_end, rc); - rc = record_setup(env, llh, mdcname, mti->mti_uuid, nodeuuid, 0, 0); + rc = record_setup(env, llh, mdcname, mti->mti_uuid, nodeuuid, + NULL, NULL); if (rc) GOTO(out_end, rc); rc = mgs_write_log_failnids(env, mti, llh, mdcname); @@ -2071,6 +2617,7 @@ static int mgs_write_log_osp_to_mdt(const struct lu_env *env, char *mdtname = NULL; char *lovname = NULL; char index_str[16]; + char nidstr[LNET_NIDSTR_SIZE]; int i, rc; ENTRY; @@ -2086,7 +2633,8 @@ static int mgs_write_log_osp_to_mdt(const struct lu_env *env, if (rc) RETURN(rc); - rc = name_create(&nodeuuid, libcfs_nid2str(mti->mti_nids[0]), ""); + libcfs_nid2str_r(mti->mti_nids[0], nidstr, sizeof(nidstr)); + rc = name_create(&nodeuuid, nidstr, ""); if (rc) GOTO(out_destory, rc); @@ -2122,11 +2670,12 @@ static int mgs_write_log_osp_to_mdt(const struct lu_env *env, for (i = 0; i < mti->mti_nid_count; i++) { CDEBUG(D_MGS, "add nid %s for mdt\n", - libcfs_nid2str(mti->mti_nids[i])); + libcfs_nid2str_r(mti->mti_nids[i], + nidstr, sizeof(nidstr))); rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid); if (rc) GOTO(out_end, rc); - } + } rc = record_attach(env, llh, ospname, LUSTRE_OSP_NAME, lovuuid); if (rc) @@ -2142,8 +2691,7 @@ static int mgs_write_log_osp_to_mdt(const struct lu_env *env, GOTO(out_end, rc); /* Add mdc(osp) to lod */ - snprintf(index_str, sizeof(mti->mti_stripe_index), "%d", - mti->mti_stripe_index); + snprintf(index_str, sizeof(index_str), "%d", mti->mti_stripe_index); rc = record_base(env, llh, lovname, 0, LCFG_ADD_MDC, mti->mti_uuid, index_str, "1", NULL); if (rc) @@ -2281,35 +2829,32 @@ static int mgs_write_log_mdt(const struct lu_env *env, #14 L mount_option 0: 1:client 2:lov1 3:MDC_uml1_mdsA_MNT_client */ - /* copy client info about lov/lmv */ - mgi->mgi_comp.comp_mti = mti; - mgi->mgi_comp.comp_fsdb = fsdb; - - rc = mgs_steal_llog_for_mdt_from_client(env, mgs, cliname, - &mgi->mgi_comp); - if (rc) - GOTO(out_free, rc); - rc = mgs_write_log_mdc_to_lmv(env, mgs, fsdb, mti, cliname, - fsdb->fsdb_clilmv); - if (rc) - GOTO(out_free, rc); + /* copy client info about lov/lmv */ + mgi->mgi_comp.comp_mti = mti; + mgi->mgi_comp.comp_fsdb = fsdb; - /* add mountopts */ - rc = record_start_log(env, mgs, &llh, cliname); - if (rc) - GOTO(out_free, rc); + rc = mgs_steal_llog_for_mdt_from_client(env, mgs, cliname, + &mgi->mgi_comp); + if (rc) + GOTO(out_free, rc); + rc = mgs_write_log_mdc_to_lmv(env, mgs, fsdb, mti, cliname, + fsdb->fsdb_clilmv); + if (rc) + GOTO(out_free, rc); - rc = record_marker(env, llh, fsdb, CM_START, cliname, - "mount opts"); - if (rc) - GOTO(out_end, rc); - rc = record_mount_opt(env, llh, cliname, fsdb->fsdb_clilov, - fsdb->fsdb_clilmv); - if (rc) - GOTO(out_end, rc); - rc = record_marker(env, llh, fsdb, CM_END, cliname, - "mount opts"); + /* add mountopts */ + rc = record_start_log(env, mgs, &llh, cliname); + if (rc) + GOTO(out_free, rc); + rc = record_marker(env, llh, fsdb, CM_START, cliname, "mount opts"); + if (rc) + GOTO(out_end, rc); + rc = record_mount_opt(env, llh, cliname, fsdb->fsdb_clilov, + fsdb->fsdb_clilmv); + if (rc) + GOTO(out_end, rc); + rc = record_marker(env, llh, fsdb, CM_END, cliname, "mount opts"); if (rc) GOTO(out_end, rc); @@ -2323,8 +2868,34 @@ static int mgs_write_log_mdt(const struct lu_env *env, if (rc) GOTO(out_end, rc); - rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb, mti, - i, logname); + /* NB: If the log for the MDT is empty, it means + * the MDT is only added to the index + * map, and not being process yet, i.e. this + * is an unregistered MDT, see mgs_write_log_target(). + * so we should skip it. Otherwise + * + * 1. MGS get register request for MDT1 and MDT2. + * + * 2. Then both MDT1 and MDT2 are added into + * fsdb_mdt_index_map. (see mgs_set_index()). + * + * 3. Then MDT1 get the lock of fsdb_mutex, then + * generate the config log, here, it will regard MDT2 + * as an existent MDT, and generate "add osp" for + * lustre-MDT0001-osp-MDT0002. Note: at the moment + * MDT0002 config log is still empty, so it will + * add "add osp" even before "lov setup", which + * will definitly cause trouble. + * + * 4. MDT1 registeration finished, fsdb_mutex is + * released, then MDT2 get in, then in above + * mgs_steal_llog_for_mdt_from_client(), it will + * add another osp log for lustre-MDT0001-osp-MDT0002, + * which will cause another trouble.*/ + if (!mgs_log_is_empty(env, mgs, logname)) + rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb, + mti, i, logname); + name_destroy(&logname); if (rc) GOTO(out_end, rc); @@ -2344,25 +2915,27 @@ static int mgs_write_log_osc_to_lov(const struct lu_env *env, char *logname, char *suffix, char *lovname, enum lustre_sec_part sec_part, int flags) { - struct llog_handle *llh = NULL; + struct llog_handle *llh = NULL; char *nodeuuid = NULL; char *oscname = NULL; char *oscuuid = NULL; char *lovuuid = NULL; char *svname = NULL; - char index[6]; - int i, rc; + char index[6]; + char nidstr[LNET_NIDSTR_SIZE]; + int i, rc; + ENTRY; - ENTRY; - CDEBUG(D_INFO, "adding osc for %s to log %s\n", - mti->mti_svname, logname); + CDEBUG(D_INFO, "adding osc for %s to log %s\n", + mti->mti_svname, logname); if (mgs_log_is_empty(env, mgs, logname)) { - CERROR("log is empty! Logical error\n"); - RETURN (-EINVAL); - } + CERROR("log is empty! Logical error\n"); + RETURN(-EINVAL); + } - rc = name_create(&nodeuuid, libcfs_nid2str(mti->mti_nids[0]), ""); + libcfs_nid2str_r(mti->mti_nids[0], nidstr, sizeof(nidstr)); + rc = name_create(&nodeuuid, nidstr, ""); if (rc) RETURN(rc); rc = name_create(&svname, mti->mti_svname, "-osc"); @@ -2413,16 +2986,19 @@ static int mgs_write_log_osc_to_lov(const struct lu_env *env, * (multiple interfaces), while nids after as failover node nids. * See mgs_steal_client_llog_handler() LCFG_ADD_UUID. */ - for (i = 0; i < mti->mti_nid_count; i++) { - CDEBUG(D_MGS, "add nid %s\n", libcfs_nid2str(mti->mti_nids[i])); + for (i = 0; i < mti->mti_nid_count; i++) { + CDEBUG(D_MGS, "add nid %s\n", + libcfs_nid2str_r(mti->mti_nids[i], + nidstr, sizeof(nidstr))); rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid); if (rc) GOTO(out_end, rc); - } + } rc = record_attach(env, llh, oscname, LUSTRE_OSC_NAME, lovuuid); if (rc) GOTO(out_end, rc); - rc = record_setup(env, llh, oscname, mti->mti_uuid, nodeuuid, 0, 0); + rc = record_setup(env, llh, oscname, mti->mti_uuid, nodeuuid, + NULL, NULL); if (rc) GOTO(out_end, rc); rc = mgs_write_log_failnids(env, mti, llh, oscname); @@ -2496,7 +3072,7 @@ static int mgs_write_log_ost(const struct lu_env *env, GOTO(out_end, rc); rc = record_setup(env, llh, mti->mti_svname, "dev"/*ignored*/, "type"/*ignored*/, - failout ? "n" : "f", 0/*options*/); + failout ? "n" : "f", NULL/*options*/); if (rc) GOTO(out_end, rc); rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname, "add ost"); @@ -2528,13 +3104,14 @@ out_end: /* Add ost to all MDT lov defs */ for (i = 0; i < INDEX_MAP_SIZE * 8; i++){ if (test_bit(i, fsdb->fsdb_mdt_index_map)) { - char mdt_index[9]; + char mdt_index[13]; rc = name_create_mdt_and_lov(&logname, &lovname, fsdb, i); if (rc) RETURN(rc); - sprintf(mdt_index, "-MDT%04x", i); + + snprintf(mdt_index, sizeof(mdt_index), "-MDT%04x", i); rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, mti, logname, mdt_index, lovname, LUSTRE_SP_MDT, @@ -2562,7 +3139,7 @@ out_end: GOTO(out_free, rc); } rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, mti, logname, "", - fsdb->fsdb_clilov, LUSTRE_SP_CLI, 0); + fsdb->fsdb_clilov, LUSTRE_SP_CLI, flags); out_free: name_destroy(&logname); RETURN(rc); @@ -2628,7 +3205,8 @@ static int mgs_write_log_add_failnid(const struct lu_env *env, /* FIXME we currently can't erase the failnids * given when a target first registers, since they aren't part of - * an "add uuid" stanza */ + * an "add uuid" stanza + */ /* Verify that we know about this target */ if (mgs_log_is_empty(env, mgs, mti->mti_svname)) { @@ -2648,12 +3226,14 @@ static int mgs_write_log_add_failnid(const struct lu_env *env, } if (rc) RETURN(rc); + /* Add failover nids to the client log */ rc = name_create(&logname, mti->mti_fsname, "-client"); if (rc) { name_destroy(&cliname); RETURN(rc); } + rc = mgs_write_log_failnid_internal(env, mgs, fsdb,mti,logname,cliname); name_destroy(&logname); name_destroy(&cliname); @@ -2691,61 +3271,51 @@ static int mgs_write_log_add_failnid(const struct lu_env *env, static int mgs_wlp_lcfg(const struct lu_env *env, struct mgs_device *mgs, struct fs_db *fsdb, - struct mgs_target_info *mti, - char *logname, struct lustre_cfg_bufs *bufs, - char *tgtname, char *ptr) + struct mgs_target_info *mti, + char *logname, struct lustre_cfg_bufs *bufs, + char *tgtname, char *ptr) { - char comment[MTI_NAME_MAXLEN]; - char *tmp; - struct lustre_cfg *lcfg; - int rc, del; - - /* Erase any old settings of this same parameter */ - memcpy(comment, ptr, MTI_NAME_MAXLEN); - comment[MTI_NAME_MAXLEN - 1] = 0; - /* But don't try to match the value. */ - if ((tmp = strchr(comment, '='))) - *tmp = 0; - /* FIXME we should skip settings that are the same as old values */ + char comment[MTI_NAME_MAXLEN]; + char *tmp; + struct llog_cfg_rec *lcr; + int rc, del; + + /* Erase any old settings of this same parameter */ + memcpy(comment, ptr, MTI_NAME_MAXLEN); + comment[MTI_NAME_MAXLEN - 1] = 0; + /* But don't try to match the value. */ + tmp = strchr(comment, '='); + if (tmp != NULL) + *tmp = 0; + /* FIXME we should skip settings that are the same as old values */ rc = mgs_modify(env, mgs, fsdb, mti, logname, tgtname, comment,CM_SKIP); if (rc < 0) return rc; - del = mgs_param_empty(ptr); + del = mgs_param_empty(ptr); - LCONSOLE_INFO("%sing parameter %s.%s in log %s\n", del ? "Disabl" : rc ? - "Sett" : "Modify", tgtname, comment, logname); - if (del) - return rc; + LCONSOLE_INFO("%s parameter %s.%s in log %s\n", del ? "Disabling" : rc ? + "Setting" : "Modifying", tgtname, comment, logname); + if (del) { + /* mgs_modify() will return 1 if nothing had to be done */ + if (rc == 1) + rc = 0; + return rc; + } - lustre_cfg_bufs_reset(bufs, tgtname); + lustre_cfg_bufs_reset(bufs, tgtname); lustre_cfg_bufs_set_string(bufs, 1, ptr); if (mti->mti_flags & LDD_F_PARAM2) lustre_cfg_bufs_set_string(bufs, 2, LCTL_UPCALL); - lcfg = lustre_cfg_new((mti->mti_flags & LDD_F_PARAM2) ? - LCFG_SET_PARAM : LCFG_PARAM, bufs); - - if (!lcfg) - return -ENOMEM; - rc = mgs_write_log_direct(env, mgs, fsdb, logname,lcfg,tgtname,comment); - lustre_cfg_free(lcfg); - return rc; -} - -static int mgs_write_log_param2(const struct lu_env *env, - struct mgs_device *mgs, - struct fs_db *fsdb, - struct mgs_target_info *mti, char *ptr) -{ - struct lustre_cfg_bufs bufs; - int rc = 0; - ENTRY; - - CDEBUG(D_MGS, "next param '%s'\n", ptr); - rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, PARAMS_FILENAME, &bufs, - mti->mti_svname, ptr); + lcr = lustre_cfg_rec_new((mti->mti_flags & LDD_F_PARAM2) ? + LCFG_SET_PARAM : LCFG_PARAM, bufs); + if (lcr == NULL) + return -ENOMEM; - RETURN(rc); + rc = mgs_write_log_direct(env, mgs, fsdb, logname, lcr, tgtname, + comment); + lustre_cfg_rec_free(lcr); + return rc; } /* write global variable settings into log */ @@ -2753,8 +3323,9 @@ static int mgs_write_log_sys(const struct lu_env *env, struct mgs_device *mgs, struct fs_db *fsdb, struct mgs_target_info *mti, char *sys, char *ptr) { - struct mgs_thread_info *mgi = mgs_env_info(env); - struct lustre_cfg *lcfg; + struct mgs_thread_info *mgi = mgs_env_info(env); + struct lustre_cfg *lcfg; + struct llog_cfg_rec *lcr; char *tmp, sep; int rc, cmd, convert = 1; @@ -2785,15 +3356,26 @@ static int mgs_write_log_sys(const struct lu_env *env, lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, sys); if (!convert && *tmp != '\0') lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 2, tmp); - lcfg = lustre_cfg_new(cmd, &mgi->mgi_bufs); - lcfg->lcfg_num = convert ? simple_strtoul(tmp, NULL, 0) : 0; + lcr = lustre_cfg_rec_new(cmd, &mgi->mgi_bufs); + if (lcr == NULL) + return -ENOMEM; + + lcfg = &lcr->lcr_cfg; + if (convert) { + rc = kstrtouint(tmp, 0, &lcfg->lcfg_num); + if (rc) + GOTO(out_rec_free, rc); + } else { + lcfg->lcfg_num = 0; + } + /* truncate the comment to the parameter name */ ptr = tmp - 1; sep = *ptr; *ptr = '\0'; /* modify all servers and clients */ rc = mgs_write_log_direct_all(env, mgs, fsdb, mti, - *tmp == '\0' ? NULL : lcfg, + *tmp == '\0' ? NULL : lcr, mti->mti_fsname, sys, 0); if (rc == 0 && *tmp != '\0') { switch (cmd) { @@ -2810,7 +3392,8 @@ static int mgs_write_log_sys(const struct lu_env *env, } } *ptr = sep; - lustre_cfg_free(lcfg); +out_rec_free: + lustre_cfg_rec_free(lcr); return rc; } @@ -2820,7 +3403,7 @@ static int mgs_write_log_quota(const struct lu_env *env, struct mgs_device *mgs, char *quota, char *ptr) { struct mgs_thread_info *mgi = mgs_env_info(env); - struct lustre_cfg *lcfg; + struct llog_cfg_rec *lcr; char *tmp; char sep; int rc, cmd = LCFG_PARAM; @@ -2839,6 +3422,7 @@ static int mgs_write_log_quota(const struct lu_env *env, struct mgs_device *mgs, CDEBUG(D_MGS, "global '%s'\n", quota); if (strchr(tmp, 'u') == NULL && strchr(tmp, 'g') == NULL && + strchr(tmp, 'p') == NULL && strcmp(tmp, "none") != 0) { CERROR("enable option(%s) isn't supported\n", tmp); return -EINVAL; @@ -2847,7 +3431,10 @@ static int mgs_write_log_quota(const struct lu_env *env, struct mgs_device *mgs, lustre_cfg_bufs_reset(&mgi->mgi_bufs, mti->mti_fsname); lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, quota); - lcfg = lustre_cfg_new(cmd, &mgi->mgi_bufs); + lcr = lustre_cfg_rec_new(cmd, &mgi->mgi_bufs); + if (lcr == NULL) + return -ENOMEM; + /* truncate the comment to the parameter name */ ptr = tmp - 1; sep = *ptr; @@ -2858,10 +3445,10 @@ static int mgs_write_log_quota(const struct lu_env *env, struct mgs_device *mgs, * log once we cleanup the config log for global param. */ /* modify all servers */ rc = mgs_write_log_direct_all(env, mgs, fsdb, mti, - *tmp == '\0' ? NULL : lcfg, + *tmp == '\0' ? NULL : lcr, mti->mti_fsname, quota, 1); *ptr = sep; - lustre_cfg_free(lcfg); + lustre_cfg_rec_free(lcr); return rc < 0 ? rc : 0; } @@ -2871,61 +3458,63 @@ static int mgs_srpc_set_param_disk(const struct lu_env *env, struct mgs_target_info *mti, char *param) { - struct mgs_thread_info *mgi = mgs_env_info(env); - struct llog_handle *llh = NULL; - char *logname; - char *comment, *ptr; - struct lustre_cfg *lcfg; - int rc, len; - ENTRY; + struct mgs_thread_info *mgi = mgs_env_info(env); + struct llog_cfg_rec *lcr; + struct llog_handle *llh = NULL; + char *logname; + char *comment, *ptr; + int rc, len; - /* get comment */ - ptr = strchr(param, '='); - LASSERT(ptr); - len = ptr - param; + ENTRY; - OBD_ALLOC(comment, len + 1); - if (comment == NULL) - RETURN(-ENOMEM); - strncpy(comment, param, len); - comment[len] = '\0'; + /* get comment */ + ptr = strchr(param, '='); + LASSERT(ptr != NULL); + len = ptr - param; + + OBD_ALLOC(comment, len + 1); + if (comment == NULL) + RETURN(-ENOMEM); + strncpy(comment, param, len); + comment[len] = '\0'; - /* prepare lcfg */ + /* prepare lcfg */ lustre_cfg_bufs_reset(&mgi->mgi_bufs, mti->mti_svname); lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, param); - lcfg = lustre_cfg_new(LCFG_SPTLRPC_CONF, &mgi->mgi_bufs); - if (lcfg == NULL) - GOTO(out_comment, rc = -ENOMEM); + lcr = lustre_cfg_rec_new(LCFG_SPTLRPC_CONF, &mgi->mgi_bufs); + if (lcr == NULL) + GOTO(out_comment, rc = -ENOMEM); - /* construct log name */ - rc = name_create(&logname, mti->mti_fsname, "-sptlrpc"); - if (rc) - GOTO(out_lcfg, rc); + /* construct log name */ + rc = name_create(&logname, mti->mti_fsname, "-sptlrpc"); + if (rc < 0) + GOTO(out_lcfg, rc); if (mgs_log_is_empty(env, mgs, logname)) { rc = record_start_log(env, mgs, &llh, logname); - if (rc) - GOTO(out, rc); + if (rc < 0) + GOTO(out, rc); record_end_log(env, &llh); - } + } - /* obsolete old one */ + /* obsolete old one */ rc = mgs_modify(env, mgs, fsdb, mti, logname, mti->mti_svname, comment, CM_SKIP); if (rc < 0) GOTO(out, rc); - /* write the new one */ - rc = mgs_write_log_direct(env, mgs, fsdb, logname, lcfg, - mti->mti_svname, comment); + /* write the new one */ + rc = mgs_write_log_direct(env, mgs, fsdb, logname, lcr, + mti->mti_svname, comment); if (rc) - CERROR("err %d writing log %s\n", rc, logname); + CERROR("%s: error writing log %s: rc = %d\n", + mgs->mgs_obd->obd_name, logname, rc); out: - name_destroy(&logname); + name_destroy(&logname); out_lcfg: - lustre_cfg_free(lcfg); + lustre_cfg_rec_free(lcr); out_comment: - OBD_FREE(comment, len + 1); - RETURN(rc); + OBD_FREE(comment, len + 1); + RETURN(rc); } static int mgs_srpc_set_param_udesc_mem(struct fs_db *fsdb, @@ -3039,6 +3628,9 @@ static int mgs_srpc_set_param_mem(struct fs_db *fsdb, } rset = &tgtconf->mtsc_rset; + } else if (strcmp(svname, MGSSELF_NAME) == 0) { + /* put _mgs related srpc rule directly in mgs ruleset */ + rset = &fsdb->fsdb_mgs->mgs_lut.lut_sptlrpc_rset; } else { rset = &fsdb->fsdb_srpc_gen; } @@ -3113,8 +3705,7 @@ static int mgs_srpc_read_handler(const struct lu_env *env, RETURN(-EINVAL); } - cfg_len = rec->lrh_len - sizeof(struct llog_rec_hdr) - - sizeof(struct llog_rec_tail); + cfg_len = REC_DATA_LEN(rec); rc = lustre_cfg_sanity_check(lcfg, cfg_len); if (rc) { @@ -3215,6 +3806,79 @@ out: RETURN(rc); } +static int mgs_write_log_param2(const struct lu_env *env, + struct mgs_device *mgs, + struct fs_db *fsdb, + struct mgs_target_info *mti, char *ptr) +{ + struct lustre_cfg_bufs bufs; + int rc; + + ENTRY; + CDEBUG(D_MGS, "next param '%s'\n", ptr); + + /* PARAM_MGSNODE and PARAM_NETWORK are set only when formating + * or during the inital mount. It can never change after that. + */ + if (!class_match_param(ptr, PARAM_MGSNODE, NULL) || + !class_match_param(ptr, PARAM_NETWORK, NULL)) { + rc = 0; + goto end; + } + + /* Processed in mgs_write_log_ost. Another value that can't + * be changed by lctl set_param -P. + */ + if (!class_match_param(ptr, PARAM_FAILMODE, NULL)) { + LCONSOLE_ERROR_MSG(0x169, + "%s can only be changed with tunefs.lustre and --writeconf\n", + ptr); + rc = -EPERM; + goto end; + } + + /* FIXME !!! Support for sptlrpc is incomplete. Currently the change + * doesn't transmit to the client. See LU-7183. + */ + if (!class_match_param(ptr, PARAM_SRPC, NULL)) { + rc = mgs_srpc_set_param(env, mgs, fsdb, mti, ptr); + goto end; + } + + /* Can't use class_match_param since ptr doesn't start with + * PARAM_FAILNODE. So we look for PARAM_FAILNODE contained in ptr. + */ + if (strstr(ptr, PARAM_FAILNODE)) { + /* Add a failover nidlist. We already processed failovers + * params for new targets in mgs_write_log_target. + */ + const char *param; + + /* can't use wildcards with failover.node */ + if (strchr(ptr, '*')) { + rc = -ENODEV; + goto end; + } + + param = strstr(ptr, PARAM_FAILNODE); + if (strlcpy(mti->mti_params, param, sizeof(mti->mti_params)) >= + sizeof(mti->mti_params)) { + rc = -E2BIG; + goto end; + } + + CDEBUG(D_MGS, "Adding failnode with param %s\n", + mti->mti_params); + rc = mgs_write_log_add_failnid(env, mgs, fsdb, mti); + goto end; + } + + rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, PARAMS_FILENAME, &bufs, + mti->mti_svname, ptr); +end: + RETURN(rc); +} + /* Permanent settings of all parameters by writing into the appropriate * configuration logs. * A parameter with null value ("='\0'") means to erase it out of @@ -3227,7 +3891,7 @@ static int mgs_write_log_param(const struct lu_env *env, struct mgs_thread_info *mgi = mgs_env_info(env); char *logname; char *tmp; - int rc = 0, rc2 = 0; + int rc = 0; ENTRY; /* For various parameter settings, we have to figure out which logs @@ -3283,61 +3947,77 @@ static int mgs_write_log_param(const struct lu_env *env, GOTO(end, rc); } - if (class_match_param(ptr, PARAM_OSC""PARAM_ACTIVE, &tmp) == 0) { - /* active=0 means off, anything else means on */ - int flag = (*tmp == '0') ? CM_EXCLUDE : 0; - int i; + if (class_match_param(ptr, PARAM_OSC PARAM_ACTIVE, &tmp) == 0 || + class_match_param(ptr, PARAM_MDC PARAM_ACTIVE, &tmp) == 0) { + /* active=0 means off, anything else means on */ + int flag = (*tmp == '0') ? CM_EXCLUDE : 0; + bool deactive_osc = memcmp(ptr, PARAM_OSC PARAM_ACTIVE, + strlen(PARAM_OSC PARAM_ACTIVE)) == 0; + int i; - if (!(mti->mti_flags & LDD_F_SV_TYPE_OST)) { - LCONSOLE_ERROR_MSG(0x144, "%s: Only OSCs can " - "be (de)activated.\n", - mti->mti_svname); - GOTO(end, rc = -EINVAL); - } - LCONSOLE_WARN("Permanently %sactivating %s\n", - flag ? "de": "re", mti->mti_svname); - /* Modify clilov */ + if (!deactive_osc) { + __u32 index; + + rc = server_name2index(mti->mti_svname, &index, NULL); + if (rc < 0) + GOTO(end, rc); + + if (index == 0) { + LCONSOLE_ERROR_MSG(0x144, "%s: MDC0 can not be" + " (de)activated.\n", + mti->mti_svname); + GOTO(end, rc = -EPERM); + } + } + + LCONSOLE_WARN("Permanently %sactivating %s\n", + flag ? "de" : "re", mti->mti_svname); + /* Modify clilov */ rc = name_create(&logname, mti->mti_fsname, "-client"); - if (rc) + if (rc < 0) GOTO(end, rc); rc = mgs_modify(env, mgs, fsdb, mti, logname, - mti->mti_svname, "add osc", flag); - name_destroy(&logname); - if (rc) - goto active_err; - /* Modify mdtlov */ - /* Add to all MDT logs for CMD */ - for (i = 0; i < INDEX_MAP_SIZE * 8; i++) { + mti->mti_svname, + deactive_osc ? "add osc" : "add mdc", flag); + name_destroy(&logname); + if (rc < 0) + goto active_err; + + /* Modify mdtlov */ + /* Add to all MDT logs for DNE */ + for (i = 0; i < INDEX_MAP_SIZE * 8; i++) { if (!test_bit(i, fsdb->fsdb_mdt_index_map)) - continue; + continue; rc = name_create_mdt(&logname, mti->mti_fsname, i); - if (rc) + if (rc < 0) GOTO(end, rc); rc = mgs_modify(env, mgs, fsdb, mti, logname, - mti->mti_svname, "add osc", flag); - name_destroy(&logname); - if (rc) - goto active_err; - } - active_err: - if (rc) { - LCONSOLE_ERROR_MSG(0x145, "Couldn't find %s in" - "log (%d). No permanent " - "changes were made to the " - "config log.\n", - mti->mti_svname, rc); + mti->mti_svname, + deactive_osc ? "add osc" : "add osp", + flag); + name_destroy(&logname); + if (rc < 0) + goto active_err; + } +active_err: + if (rc < 0) { + LCONSOLE_ERROR_MSG(0x145, "Couldn't find %s in" + "log (%d). No permanent " + "changes were made to the " + "config log.\n", + mti->mti_svname, rc); if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags)) - LCONSOLE_ERROR_MSG(0x146, "This may be" - " because the log" - "is in the old 1.4" - "style. Consider " - " --writeconf to " - "update the logs.\n"); - GOTO(end, rc); - } - /* Fall through to osc proc for deactivating live OSC - on running MDT / clients. */ - } + LCONSOLE_ERROR_MSG(0x146, "This may be" + " because the log" + "is in the old 1.4" + "style. Consider " + " --writeconf to " + "update the logs.\n"); + GOTO(end, rc); + } + /* Fall through to osc/mdc proc for deactivating live + OSC/OSP on running MDT / clients. */ + } /* Below here, let obd's XXX_process_config methods handle it */ /* All lov. in proc */ @@ -3406,9 +4086,22 @@ static int mgs_write_log_param(const struct lu_env *env, if (rc) GOTO(end, rc); - CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4); + /* Forbid direct update of llite root squash parameters. + * These parameters are indirectly set via the MDT settings. + * See (LU-1778) */ + if ((class_match_param(ptr, PARAM_LLITE, &tmp) == 0) && + ((memcmp(tmp, "root_squash=", 12) == 0) || + (memcmp(tmp, "nosquash_nids=", 14) == 0))) { + LCONSOLE_ERROR("%s: root squash parameters can only " + "be updated through MDT component\n", + mti->mti_fsname); + name_destroy(&cname); + GOTO(end, rc = -EINVAL); + } - /* Modify client */ + CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4); + + /* Modify client */ rc = name_create(&logname, mti->mti_fsname, "-client"); if (rc) { name_destroy(&cname); @@ -3444,13 +4137,85 @@ static int mgs_write_log_param(const struct lu_env *env, } } } - name_destroy(&logname); - name_destroy(&cname); - GOTO(end, rc); - } - /* All mdt. params in proc */ - if (class_match_param(ptr, PARAM_MDT, NULL) == 0) { + /* For mdc activate/deactivate, it affects OSP on MDT as well */ + if (class_match_param(ptr, PARAM_MDC PARAM_ACTIVE, &tmp) == 0 && + rc == 0) { + char suffix[16]; + char *lodname = NULL; + char *param_str = NULL; + int i; + int index; + + /* replace mdc with osp */ + memcpy(ptr, PARAM_OSP, strlen(PARAM_OSP)); + rc = server_name2index(mti->mti_svname, &index, NULL); + if (rc < 0) { + memcpy(ptr, PARAM_MDC, strlen(PARAM_MDC)); + GOTO(end, rc); + } + + for (i = 0; i < INDEX_MAP_SIZE * 8; i++) { + if (!test_bit(i, fsdb->fsdb_mdt_index_map)) + continue; + + if (i == index) + continue; + + name_destroy(&logname); + rc = name_create_mdt(&logname, mti->mti_fsname, + i); + if (rc < 0) + break; + + if (mgs_log_is_empty(env, mgs, logname)) + continue; + + snprintf(suffix, sizeof(suffix), "-osp-MDT%04x", + i); + name_destroy(&cname); + rc = name_create(&cname, mti->mti_svname, + suffix); + if (rc < 0) + break; + + rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname, + &mgi->mgi_bufs, cname, ptr); + if (rc < 0) + break; + + /* Add configuration log for noitfying LOD + * to active/deactive the OSP. */ + name_destroy(¶m_str); + rc = name_create(¶m_str, cname, + (*tmp == '0') ? ".active=0" : + ".active=1"); + if (rc < 0) + break; + + name_destroy(&lodname); + rc = name_create(&lodname, logname, "-mdtlov"); + if (rc < 0) + break; + + rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname, + &mgi->mgi_bufs, lodname, + param_str); + if (rc < 0) + break; + } + memcpy(ptr, PARAM_MDC, strlen(PARAM_MDC)); + name_destroy(&lodname); + name_destroy(¶m_str); + } + + name_destroy(&logname); + name_destroy(&cname); + GOTO(end, rc); + } + + /* All mdt. params in proc */ + if (class_match_param(ptr, PARAM_MDT, &tmp) == 0) { int i; __u32 idx; @@ -3476,23 +4241,60 @@ static int mgs_write_log_param(const struct lu_env *env, goto active_err; rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname, &mgi->mgi_bufs, - logname, ptr); - name_destroy(&logname); - if (rc) - goto active_err; - } - } else { + logname, ptr); + name_destroy(&logname); + if (rc) + goto active_err; + } + } else { + if ((memcmp(tmp, "root_squash=", 12) == 0) || + (memcmp(tmp, "nosquash_nids=", 14) == 0)) { + LCONSOLE_ERROR("%s: root squash parameters " + "cannot be applied to a single MDT\n", + mti->mti_fsname); + GOTO(end, rc = -EINVAL); + } rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, mti->mti_svname, &mgi->mgi_bufs, - mti->mti_svname, ptr); - if (rc) - goto active_err; - } - GOTO(end, rc); - } + mti->mti_svname, ptr); + if (rc) + goto active_err; + } + + /* root squash settings are also applied to llite + * config log (see LU-1778) */ + if (rc == 0 && + ((memcmp(tmp, "root_squash=", 12) == 0) || + (memcmp(tmp, "nosquash_nids=", 14) == 0))) { + char *cname; + char *ptr2; + + rc = name_create(&cname, mti->mti_fsname, "-client"); + if (rc) + GOTO(end, rc); + rc = name_create(&logname, mti->mti_fsname, "-client"); + if (rc) { + name_destroy(&cname); + GOTO(end, rc); + } + rc = name_create(&ptr2, PARAM_LLITE, tmp); + if (rc) { + name_destroy(&cname); + name_destroy(&logname); + GOTO(end, rc); + } + rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname, + &mgi->mgi_bufs, cname, ptr2); + name_destroy(&ptr2); + name_destroy(&logname); + name_destroy(&cname); + } + GOTO(end, rc); + } /* All mdd., ost. and osd. params in proc */ if ((class_match_param(ptr, PARAM_MDD, NULL) == 0) || + (class_match_param(ptr, PARAM_LOD, NULL) == 0) || (class_match_param(ptr, PARAM_OST, NULL) == 0) || (class_match_param(ptr, PARAM_OSD, NULL) == 0)) { CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4); @@ -3505,62 +4307,26 @@ static int mgs_write_log_param(const struct lu_env *env, } LCONSOLE_WARN("Ignoring unrecognized param '%s'\n", ptr); - rc2 = -ENOSYS; end: if (rc) CERROR("err %d on param '%s'\n", rc, ptr); - RETURN(rc ?: rc2); + RETURN(rc); } -/* Not implementing automatic failover nid addition at this time. */ -int mgs_check_failnid(const struct lu_env *env, struct mgs_device *mgs, - struct mgs_target_info *mti) +int mgs_write_log_target(const struct lu_env *env, struct mgs_device *mgs, + struct mgs_target_info *mti, struct fs_db *fsdb) { -#if 0 - struct fs_db *fsdb; - int rc; - ENTRY; - - rc = mgs_find_or_make_fsdb(obd, fsname, &fsdb); - if (rc) - RETURN(rc); - - if (mgs_log_is_empty(obd, mti->mti_svname)) - /* should never happen */ - RETURN(-ENOENT); - - CDEBUG(D_MGS, "Checking for new failnids for %s\n", mti->mti_svname); - - /* FIXME We can just check mti->params to see if we're already in - the failover list. Modify mti->params for rewriting back at - server_register_target(). */ - - mutex_lock(&fsdb->fsdb_mutex); - rc = mgs_write_log_add_failnid(obd, fsdb, mti); - mutex_unlock(&fsdb->fsdb_mutex); - - RETURN(rc); -#endif - return 0; -} + char *buf, *params; + int rc = -EINVAL; -int mgs_write_log_target(const struct lu_env *env, - struct mgs_device *mgs, - struct mgs_target_info *mti, - struct fs_db *fsdb) -{ - int rc = -EINVAL; - char *buf, *params; - ENTRY; + ENTRY; - /* set/check the new target index */ + /* set/check the new target index */ rc = mgs_set_index(env, mgs, mti); - if (rc < 0) { - CERROR("Can't get index (%d)\n", rc); - RETURN(rc); - } + if (rc < 0) + RETURN(rc); if (rc == EALREADY) { LCONSOLE_WARN("Found index %d for %s, updating log\n", @@ -3574,29 +4340,31 @@ int mgs_write_log_target(const struct lu_env *env, rc = 0; } + OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_WRITE_TARGET_DELAY, cfs_fail_val > 0 ? + cfs_fail_val : 10); + mutex_lock(&fsdb->fsdb_mutex); - if (mti->mti_flags & - (LDD_F_VIRGIN | LDD_F_UPGRADE14 | LDD_F_WRITECONF)) { - /* Generate a log from scratch */ - if (mti->mti_flags & LDD_F_SV_TYPE_MDT) { + if (mti->mti_flags & (LDD_F_VIRGIN | LDD_F_WRITECONF)) { + /* Generate a log from scratch */ + if (mti->mti_flags & LDD_F_SV_TYPE_MDT) { rc = mgs_write_log_mdt(env, mgs, fsdb, mti); - } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) { + } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) { rc = mgs_write_log_ost(env, mgs, fsdb, mti); - } else { - CERROR("Unknown target type %#x, can't create log for " - "%s\n", mti->mti_flags, mti->mti_svname); - } - if (rc) { - CERROR("Can't write logs for %s (%d)\n", - mti->mti_svname, rc); - GOTO(out_up, rc); - } - } else { - /* Just update the params from tunefs in mgs_write_log_params */ - CDEBUG(D_MGS, "Update params for %s\n", mti->mti_svname); - mti->mti_flags |= LDD_F_PARAM; - } + } else { + CERROR("Unknown target type %#x, can't create log for %s\n", + mti->mti_flags, mti->mti_svname); + } + if (rc) { + CERROR("Can't write logs for %s (%d)\n", + mti->mti_svname, rc); + GOTO(out_up, rc); + } + } else { + /* Just update the params from tunefs in mgs_write_log_params */ + CDEBUG(D_MGS, "Update params for %s\n", mti->mti_svname); + mti->mti_flags |= LDD_F_PARAM; + } /* allocate temporary buffer, where class_get_next_param will make copy of a current parameter */ @@ -3653,51 +4421,65 @@ int mgs_erase_log(const struct lu_env *env, struct mgs_device *mgs, char *name) } /* erase all logs for the given fs */ -int mgs_erase_logs(const struct lu_env *env, struct mgs_device *mgs, char *fsname) +int mgs_erase_logs(const struct lu_env *env, struct mgs_device *mgs, + const char *fsname) { - struct fs_db *fsdb; - cfs_list_t list; + struct list_head log_list; struct mgs_direntry *dirent, *n; - int rc, len = strlen(fsname); + char barrier_name[20] = {}; char *suffix; + int count = 0; + int rc, len = strlen(fsname); ENTRY; + mutex_lock(&mgs->mgs_mutex); + /* Find all the logs in the CONFIGS directory */ - rc = class_dentry_readdir(env, mgs, &list); - if (rc) + rc = class_dentry_readdir(env, mgs, &log_list); + if (rc) { + mutex_unlock(&mgs->mgs_mutex); RETURN(rc); + } - mutex_lock(&mgs->mgs_mutex); - - /* Delete the fs db */ - fsdb = mgs_find_fsdb(mgs, fsname); - if (fsdb) - mgs_free_fsdb(mgs, fsdb); + if (list_empty(&log_list)) { + mutex_unlock(&mgs->mgs_mutex); + RETURN(-ENOENT); + } + snprintf(barrier_name, sizeof(barrier_name) - 1, "%s-%s", + fsname, BARRIER_FILENAME); + /* Delete the barrier fsdb */ + mgs_remove_fsdb_by_name(mgs, barrier_name); + /* Delete the fs db */ + mgs_remove_fsdb_by_name(mgs, fsname); mutex_unlock(&mgs->mgs_mutex); - cfs_list_for_each_entry_safe(dirent, n, &list, list) { - cfs_list_del(&dirent->list); - suffix = strrchr(dirent->name, '-'); + list_for_each_entry_safe(dirent, n, &log_list, mde_list) { + list_del_init(&dirent->mde_list); + suffix = strrchr(dirent->mde_name, '-'); if (suffix != NULL) { - if ((len == suffix - dirent->name) && - (strncmp(fsname, dirent->name, len) == 0)) { + if ((len == suffix - dirent->mde_name) && + (strncmp(fsname, dirent->mde_name, len) == 0)) { CDEBUG(D_MGS, "Removing log %s\n", - dirent->name); - mgs_erase_log(env, mgs, dirent->name); + dirent->mde_name); + mgs_erase_log(env, mgs, dirent->mde_name); + count++; } } mgs_direntry_free(dirent); } - RETURN(rc); + if (count == 0) + rc = -ENOENT; + + RETURN(rc); } /* list all logs for the given fs */ int mgs_list_logs(const struct lu_env *env, struct mgs_device *mgs, struct obd_ioctl_data *data) { - cfs_list_t list; + struct list_head log_list; struct mgs_direntry *dirent, *n; char *out, *suffix; int l, remains, rc; @@ -3705,21 +4487,18 @@ int mgs_list_logs(const struct lu_env *env, struct mgs_device *mgs, ENTRY; /* Find all the logs in the CONFIGS directory */ - rc = class_dentry_readdir(env, mgs, &list); - if (rc) { - CERROR("%s: can't read %s dir = %d\n", - mgs->mgs_obd->obd_name, MOUNT_CONFIGS_DIR, rc); + rc = class_dentry_readdir(env, mgs, &log_list); + if (rc) RETURN(rc); - } out = data->ioc_bulk; remains = data->ioc_inllen1; - cfs_list_for_each_entry_safe(dirent, n, &list, list) { - cfs_list_del(&dirent->list); - suffix = strrchr(dirent->name, '-'); + list_for_each_entry_safe(dirent, n, &log_list, mde_list) { + list_del_init(&dirent->mde_list); + suffix = strrchr(dirent->mde_name, '-'); if (suffix != NULL) { - l = snprintf(out, remains, "config log: $%s\n", - dirent->name); + l = snprintf(out, remains, "config_log: %s\n", + dirent->mde_name); out += l; remains -= l; } @@ -3730,140 +4509,860 @@ int mgs_list_logs(const struct lu_env *env, struct mgs_device *mgs, RETURN(rc); } -/* from llog_swab */ -static void print_lustre_cfg(struct lustre_cfg *lcfg) +struct mgs_lcfg_fork_data { + struct lustre_cfg_bufs mlfd_bufs; + struct mgs_device *mlfd_mgs; + struct llog_handle *mlfd_llh; + const char *mlfd_oldname; + const char *mlfd_newname; + char mlfd_data[0]; +}; + +static bool contain_valid_fsname(char *buf, const char *fsname, + int buflen, int namelen) { - int i; - ENTRY; + if (buflen < namelen) + return false; - CDEBUG(D_MGS, "lustre_cfg: %p\n", lcfg); - CDEBUG(D_MGS, "\tlcfg->lcfg_version: %#x\n", lcfg->lcfg_version); + if (memcmp(buf, fsname, namelen) != 0) + return false; - CDEBUG(D_MGS, "\tlcfg->lcfg_command: %#x\n", lcfg->lcfg_command); - CDEBUG(D_MGS, "\tlcfg->lcfg_num: %#x\n", lcfg->lcfg_num); - CDEBUG(D_MGS, "\tlcfg->lcfg_flags: %#x\n", lcfg->lcfg_flags); - CDEBUG(D_MGS, "\tlcfg->lcfg_nid: %s\n", libcfs_nid2str(lcfg->lcfg_nid)); + if (buf[namelen] != '\0' && buf[namelen] != '-') + return false; - CDEBUG(D_MGS, "\tlcfg->lcfg_bufcount: %d\n", lcfg->lcfg_bufcount); - if (lcfg->lcfg_bufcount < LUSTRE_CFG_MAX_BUFCOUNT) - for (i = 0; i < lcfg->lcfg_bufcount; i++) { - CDEBUG(D_MGS, "\tlcfg->lcfg_buflens[%d]: %d %s\n", - i, lcfg->lcfg_buflens[i], - lustre_cfg_string(lcfg, i)); - } - EXIT; + return true; } -/* Set a permanent (config log) param for a target or fs - * \param lcfg buf0 may contain the device (testfs-MDT0000) name - * buf1 contains the single parameter - */ -int mgs_setparam(const struct lu_env *env, struct mgs_device *mgs, - struct lustre_cfg *lcfg, char *fsname) +static int mgs_lcfg_fork_handler(const struct lu_env *env, + struct llog_handle *o_llh, + struct llog_rec_hdr *o_rec, void *data) { - struct fs_db *fsdb; - struct mgs_target_info *mti; - char *devname, *param; - char *ptr; - const char *tmp; - __u32 index; + struct mgs_lcfg_fork_data *mlfd = data; + struct lustre_cfg_bufs *n_bufs = &mlfd->mlfd_bufs; + struct lustre_cfg *o_lcfg = (struct lustre_cfg *)(o_rec + 1); + struct llog_cfg_rec *lcr; + char *o_buf; + char *n_buf = mlfd->mlfd_data; + int o_buflen; + int o_namelen = strlen(mlfd->mlfd_oldname); + int n_namelen = strlen(mlfd->mlfd_newname); + int diff = n_namelen - o_namelen; + __u32 cmd = o_lcfg->lcfg_command; + __u32 cnt = o_lcfg->lcfg_bufcount; + int rc; + int i; + ENTRY; + + /* buf[0] */ + o_buf = lustre_cfg_buf(o_lcfg, 0); + o_buflen = o_lcfg->lcfg_buflens[0]; + if (contain_valid_fsname(o_buf, mlfd->mlfd_oldname, o_buflen, + o_namelen)) { + memcpy(n_buf, mlfd->mlfd_newname, n_namelen); + memcpy(n_buf + n_namelen, o_buf + o_namelen, + o_buflen - o_namelen); + lustre_cfg_bufs_reset(n_bufs, n_buf); + n_buf += cfs_size_round(o_buflen + diff); + } else { + lustre_cfg_bufs_reset(n_bufs, o_buflen != 0 ? o_buf : NULL); + } + + switch (cmd) { + case LCFG_MARKER: { + struct cfg_marker *o_marker; + struct cfg_marker *n_marker; + int tgt_namelen; + + if (cnt != 2) { + CDEBUG(D_MGS, "Unknown cfg marker entry with %d " + "buffers\n", cnt); + RETURN(-EINVAL); + } + + /* buf[1] is marker */ + o_buf = lustre_cfg_buf(o_lcfg, 1); + o_buflen = o_lcfg->lcfg_buflens[1]; + o_marker = (struct cfg_marker *)o_buf; + if (!contain_valid_fsname(o_marker->cm_tgtname, + mlfd->mlfd_oldname, + sizeof(o_marker->cm_tgtname), + o_namelen)) { + lustre_cfg_bufs_set(n_bufs, 1, o_marker, + sizeof(*o_marker)); + break; + } + + n_marker = (struct cfg_marker *)n_buf; + *n_marker = *o_marker; + memcpy(n_marker->cm_tgtname, mlfd->mlfd_newname, n_namelen); + tgt_namelen = strlen(o_marker->cm_tgtname); + if (tgt_namelen > o_namelen) + memcpy(n_marker->cm_tgtname + n_namelen, + o_marker->cm_tgtname + o_namelen, + tgt_namelen - o_namelen); + n_marker->cm_tgtname[tgt_namelen + diff] = '\0'; + lustre_cfg_bufs_set(n_bufs, 1, n_marker, sizeof(*n_marker)); + break; + } + case LCFG_PARAM: + case LCFG_SET_PARAM: { + for (i = 1; i < cnt; i++) + /* buf[i] is the param value, reuse it directly */ + lustre_cfg_bufs_set(n_bufs, i, + lustre_cfg_buf(o_lcfg, i), + o_lcfg->lcfg_buflens[i]); + break; + } + case LCFG_POOL_NEW: + case LCFG_POOL_ADD: + case LCFG_POOL_REM: + case LCFG_POOL_DEL: { + if (cnt < 3 || cnt > 4) { + CDEBUG(D_MGS, "Unknown cfg pool (%x) entry with %d " + "buffers\n", cmd, cnt); + RETURN(-EINVAL); + } + + /* buf[1] is fsname */ + o_buf = lustre_cfg_buf(o_lcfg, 1); + o_buflen = o_lcfg->lcfg_buflens[1]; + memcpy(n_buf, mlfd->mlfd_newname, n_namelen); + memcpy(n_buf + n_namelen, o_buf + o_namelen, + o_buflen - o_namelen); + lustre_cfg_bufs_set(n_bufs, 1, n_buf, o_buflen + diff); + n_buf += cfs_size_round(o_buflen + diff); + + /* buf[2] is the pool name, reuse it directly */ + lustre_cfg_bufs_set(n_bufs, 2, lustre_cfg_buf(o_lcfg, 2), + o_lcfg->lcfg_buflens[2]); + + if (cnt == 3) + break; + + /* buf[3] is ostname */ + o_buf = lustre_cfg_buf(o_lcfg, 3); + o_buflen = o_lcfg->lcfg_buflens[3]; + memcpy(n_buf, mlfd->mlfd_newname, n_namelen); + memcpy(n_buf + n_namelen, o_buf + o_namelen, + o_buflen - o_namelen); + lustre_cfg_bufs_set(n_bufs, 3, n_buf, o_buflen + diff); + break; + } + case LCFG_SETUP: { + if (cnt == 2) { + o_buflen = o_lcfg->lcfg_buflens[1]; + if (o_buflen == sizeof(struct lov_desc) || + o_buflen == sizeof(struct lmv_desc)) { + char *o_uuid; + char *n_uuid; + int uuid_len; + + /* buf[1] */ + o_buf = lustre_cfg_buf(o_lcfg, 1); + if (o_buflen == sizeof(struct lov_desc)) { + struct lov_desc *o_desc = + (struct lov_desc *)o_buf; + struct lov_desc *n_desc = + (struct lov_desc *)n_buf; + + *n_desc = *o_desc; + o_uuid = o_desc->ld_uuid.uuid; + n_uuid = n_desc->ld_uuid.uuid; + uuid_len = sizeof(o_desc->ld_uuid.uuid); + } else { + struct lmv_desc *o_desc = + (struct lmv_desc *)o_buf; + struct lmv_desc *n_desc = + (struct lmv_desc *)n_buf; + + *n_desc = *o_desc; + o_uuid = o_desc->ld_uuid.uuid; + n_uuid = n_desc->ld_uuid.uuid; + uuid_len = sizeof(o_desc->ld_uuid.uuid); + } + + if (unlikely(!contain_valid_fsname(o_uuid, + mlfd->mlfd_oldname, uuid_len, + o_namelen))) { + lustre_cfg_bufs_set(n_bufs, 1, o_buf, + o_buflen); + break; + } + + memcpy(n_uuid, mlfd->mlfd_newname, n_namelen); + uuid_len = strlen(o_uuid); + if (uuid_len > o_namelen) + memcpy(n_uuid + n_namelen, + o_uuid + o_namelen, + uuid_len - o_namelen); + n_uuid[uuid_len + diff] = '\0'; + lustre_cfg_bufs_set(n_bufs, 1, n_buf, o_buflen); + break; + } /* else case fall through */ + } /* else case fall through */ + } + default: { + for (i = 1; i < cnt; i++) { + o_buflen = o_lcfg->lcfg_buflens[i]; + if (o_buflen == 0) + continue; + + o_buf = lustre_cfg_buf(o_lcfg, i); + if (!contain_valid_fsname(o_buf, mlfd->mlfd_oldname, + o_buflen, o_namelen)) { + lustre_cfg_bufs_set(n_bufs, i, o_buf, o_buflen); + continue; + } + + memcpy(n_buf, mlfd->mlfd_newname, n_namelen); + if (o_buflen == o_namelen) { + lustre_cfg_bufs_set(n_bufs, i, n_buf, + n_namelen); + n_buf += cfs_size_round(n_namelen); + continue; + } + + memcpy(n_buf + n_namelen, o_buf + o_namelen, + o_buflen - o_namelen); + lustre_cfg_bufs_set(n_bufs, i, n_buf, o_buflen + diff); + n_buf += cfs_size_round(o_buflen + diff); + } + break; + } + } + + lcr = lustre_cfg_rec_new(cmd, n_bufs); + if (!lcr) + RETURN(-ENOMEM); + + lcr->lcr_cfg = *o_lcfg; + rc = llog_write(env, mlfd->mlfd_llh, &lcr->lcr_hdr, LLOG_NEXT_IDX); + lustre_cfg_rec_free(lcr); + + RETURN(rc); +} + +static int mgs_lcfg_fork_one(const struct lu_env *env, struct mgs_device *mgs, + struct mgs_direntry *mde, const char *oldname, + const char *newname) +{ + struct llog_handle *old_llh = NULL; + struct llog_handle *new_llh = NULL; + struct llog_ctxt *ctxt = NULL; + struct mgs_lcfg_fork_data *mlfd = NULL; + char *name_buf = NULL; + int name_buflen; + int old_namelen = strlen(oldname); + int new_namelen = strlen(newname); + int rc; + ENTRY; + + name_buflen = mde->mde_len + new_namelen - old_namelen; + OBD_ALLOC(name_buf, name_buflen); + if (!name_buf) + RETURN(-ENOMEM); + + memcpy(name_buf, newname, new_namelen); + memcpy(name_buf + new_namelen, mde->mde_name + old_namelen, + mde->mde_len - old_namelen); + + CDEBUG(D_MGS, "Fork the config-log from %s to %s\n", + mde->mde_name, name_buf); + + ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT); + LASSERT(ctxt); + + rc = llog_open_create(env, ctxt, &new_llh, NULL, name_buf); + if (rc) + GOTO(out, rc); + + rc = llog_init_handle(env, new_llh, LLOG_F_IS_PLAIN, NULL); + if (rc) + GOTO(out, rc); + + if (unlikely(mgs_log_is_empty(env, mgs, mde->mde_name))) + GOTO(out, rc = 0); + + rc = llog_open(env, ctxt, &old_llh, NULL, mde->mde_name, + LLOG_OPEN_EXISTS); + if (rc) + GOTO(out, rc); + + rc = llog_init_handle(env, old_llh, LLOG_F_IS_PLAIN, NULL); + if (rc) + GOTO(out, rc); + + new_llh->lgh_hdr->llh_tgtuuid = old_llh->lgh_hdr->llh_tgtuuid; + + OBD_ALLOC(mlfd, LLOG_MIN_CHUNK_SIZE); + if (!mlfd) + GOTO(out, rc = -ENOMEM); + + mlfd->mlfd_mgs = mgs; + mlfd->mlfd_llh = new_llh; + mlfd->mlfd_oldname = oldname; + mlfd->mlfd_newname = newname; + + rc = llog_process(env, old_llh, mgs_lcfg_fork_handler, mlfd, NULL); + OBD_FREE(mlfd, LLOG_MIN_CHUNK_SIZE); + + GOTO(out, rc); + +out: + if (old_llh) + llog_close(env, old_llh); + if (new_llh) + llog_close(env, new_llh); + if (name_buf) + OBD_FREE(name_buf, name_buflen); + if (ctxt) + llog_ctxt_put(ctxt); + + return rc; +} + +int mgs_lcfg_fork(const struct lu_env *env, struct mgs_device *mgs, + const char *oldname, const char *newname) +{ + struct list_head log_list; + struct mgs_direntry *dirent, *n; + int olen = strlen(oldname); + int nlen = strlen(newname); + int count = 0; int rc = 0; ENTRY; - print_lustre_cfg(lcfg); + if (unlikely(!oldname || oldname[0] == '\0' || + !newname || newname[0] == '\0')) + RETURN(-EINVAL); - /* lustre, lustre-mdtlov, lustre-client, lustre-MDT0000 */ - devname = lustre_cfg_string(lcfg, 0); - param = lustre_cfg_string(lcfg, 1); - if (!devname) { - /* Assume device name embedded in param: - lustre-OST0000.osc.max_dirty_mb=32 */ - ptr = strchr(param, '.'); - if (ptr) { - devname = param; - *ptr = 0; - param = ptr + 1; - } - } - if (!devname) { - LCONSOLE_ERROR_MSG(0x14d, "No target specified: %s\n", param); - RETURN(-ENOSYS); - } + if (strcmp(oldname, newname) == 0) + RETURN(-EINVAL); - rc = mgs_parse_devname(devname, fsname, NULL); - if (rc == 0 && !mgs_parse_devname(devname, NULL, &index)) { - /* param related to llite isn't allowed to set by OST or MDT */ - if (rc == 0 && strncmp(param, PARAM_LLITE, - sizeof(PARAM_LLITE) - 1) == 0) - RETURN(-EINVAL); - } else { - /* assume devname is the fsname */ - memset(fsname, 0, MTI_NAME_MAXLEN); - strncpy(fsname, devname, MTI_NAME_MAXLEN); - fsname[MTI_NAME_MAXLEN - 1] = 0; - } - CDEBUG(D_MGS, "setparam fs='%s' device='%s'\n", fsname, devname); + /* lock it to prevent fork/erase/register in parallel. */ + mutex_lock(&mgs->mgs_mutex); + + rc = class_dentry_readdir(env, mgs, &log_list); + if (rc) { + mutex_unlock(&mgs->mgs_mutex); + RETURN(rc); + } + + if (list_empty(&log_list)) { + mutex_unlock(&mgs->mgs_mutex); + RETURN(-ENOENT); + } + + list_for_each_entry_safe(dirent, n, &log_list, mde_list) { + char *ptr; + + ptr = strrchr(dirent->mde_name, '-'); + if (ptr) { + int tlen = ptr - dirent->mde_name; + + if (tlen == nlen && + strncmp(newname, dirent->mde_name, tlen) == 0) + GOTO(out, rc = -EEXIST); + + if (tlen == olen && + strncmp(oldname, dirent->mde_name, tlen) == 0) + continue; + } + + list_del_init(&dirent->mde_list); + mgs_direntry_free(dirent); + } + + if (list_empty(&log_list)) { + mutex_unlock(&mgs->mgs_mutex); + RETURN(-ENOENT); + } + + list_for_each_entry(dirent, &log_list, mde_list) { + rc = mgs_lcfg_fork_one(env, mgs, dirent, oldname, newname); + if (rc) + break; + + count++; + } + +out: + mutex_unlock(&mgs->mgs_mutex); + + list_for_each_entry_safe(dirent, n, &log_list, mde_list) { + list_del_init(&dirent->mde_list); + mgs_direntry_free(dirent); + } + + if (rc && count > 0) + mgs_erase_logs(env, mgs, newname); + + RETURN(rc); +} + +int mgs_lcfg_erase(const struct lu_env *env, struct mgs_device *mgs, + const char *fsname) +{ + int rc; + ENTRY; + + if (unlikely(!fsname || fsname[0] == '\0')) + RETURN(-EINVAL); - rc = mgs_find_or_make_fsdb(env, mgs, - lcfg->lcfg_command == LCFG_SET_PARAM ? - PARAMS_FILENAME : fsname, &fsdb); + rc = mgs_erase_logs(env, mgs, fsname); + + RETURN(rc); +} + +static int mgs_xattr_del(const struct lu_env *env, struct dt_object *obj) +{ + struct dt_device *dev; + struct thandle *th = NULL; + int rc = 0; + + ENTRY; + + dev = container_of0(obj->do_lu.lo_dev, struct dt_device, dd_lu_dev); + th = dt_trans_create(env, dev); + if (IS_ERR(th)) + RETURN(PTR_ERR(th)); + + rc = dt_declare_xattr_del(env, obj, XATTR_TARGET_RENAME, th); + if (rc) + GOTO(stop, rc); + + rc = dt_trans_start_local(env, dev, th); + if (rc) + GOTO(stop, rc); + + dt_write_lock(env, obj, 0); + rc = dt_xattr_del(env, obj, XATTR_TARGET_RENAME, th); + + GOTO(unlock, rc); + +unlock: + dt_write_unlock(env, obj); + +stop: + dt_trans_stop(env, dev, th); + + return rc; +} + +int mgs_lcfg_rename(const struct lu_env *env, struct mgs_device *mgs) +{ + struct list_head log_list; + struct mgs_direntry *dirent, *n; + char fsname[16]; + struct lu_buf buf = { + .lb_buf = fsname, + .lb_len = sizeof(fsname) + }; + int rc = 0; + + ENTRY; + + rc = class_dentry_readdir(env, mgs, &log_list); if (rc) RETURN(rc); - if (lcfg->lcfg_command != LCFG_SET_PARAM && - !test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags) && - test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) { - CERROR("No filesystem targets for %s. cfg_device from lctl " - "is '%s'\n", fsname, devname); - mgs_free_fsdb(mgs, fsdb); - RETURN(-EINVAL); - } + if (list_empty(&log_list)) + RETURN(0); - /* Create a fake mti to hold everything */ - OBD_ALLOC_PTR(mti); - if (!mti) - GOTO(out, rc = -ENOMEM); - if (strlcpy(mti->mti_fsname, fsname, sizeof(mti->mti_fsname)) - >= sizeof(mti->mti_fsname)) - GOTO(out, rc = -E2BIG); - if (strlcpy(mti->mti_svname, devname, sizeof(mti->mti_svname)) - >= sizeof(mti->mti_svname)) - GOTO(out, rc = -E2BIG); - if (strlcpy(mti->mti_params, param, sizeof(mti->mti_params)) - >= sizeof(mti->mti_params)) + list_for_each_entry_safe(dirent, n, &log_list, mde_list) { + struct dt_object *o = NULL; + char oldname[16]; + char *ptr; + int len; + + list_del_init(&dirent->mde_list); + ptr = strrchr(dirent->mde_name, '-'); + if (!ptr) + goto next; + + len = ptr - dirent->mde_name; + if (unlikely(len >= sizeof(oldname))) { + CDEBUG(D_MGS, "Skip invalid configuration file %s\n", + dirent->mde_name); + goto next; + } + + o = local_file_find(env, mgs->mgs_los, mgs->mgs_configs_dir, + dirent->mde_name); + if (IS_ERR(o)) { + rc = PTR_ERR(o); + CDEBUG(D_MGS, "Fail to locate file %s: rc = %d\n", + dirent->mde_name, rc); + goto next; + } + + rc = dt_xattr_get(env, o, &buf, XATTR_TARGET_RENAME); + if (rc < 0) { + if (rc == -ENODATA) + rc = 0; + else + CDEBUG(D_MGS, + "Fail to get EA for %s: rc = %d\n", + dirent->mde_name, rc); + goto next; + } + + if (unlikely(rc == len && + memcmp(fsname, dirent->mde_name, len) == 0)) { + /* The new fsname is the same as the old one. */ + rc = mgs_xattr_del(env, o); + goto next; + } + + memcpy(oldname, dirent->mde_name, len); + oldname[len] = '\0'; + fsname[rc] = '\0'; + rc = mgs_lcfg_fork_one(env, mgs, dirent, oldname, fsname); + if (rc && rc != -EEXIST) { + CDEBUG(D_MGS, "Fail to fork %s: rc = %d\n", + dirent->mde_name, rc); + goto next; + } + + rc = mgs_erase_log(env, mgs, dirent->mde_name); + if (rc) { + CDEBUG(D_MGS, "Fail to erase old %s: rc = %d\n", + dirent->mde_name, rc); + /* keep it there if failed to remove it. */ + rc = 0; + } + +next: + if (o && !IS_ERR(o)) + lu_object_put(env, &o->do_lu); + + mgs_direntry_free(dirent); + if (rc) + break; + } + + list_for_each_entry_safe(dirent, n, &log_list, mde_list) { + list_del_init(&dirent->mde_list); + mgs_direntry_free(dirent); + } + + RETURN(rc); +} + +/* Setup _mgs fsdb and log + */ +int mgs__mgs_fsdb_setup(const struct lu_env *env, struct mgs_device *mgs) +{ + struct fs_db *fsdb = NULL; + int rc; + ENTRY; + + rc = mgs_find_or_make_fsdb(env, mgs, MGSSELF_NAME, &fsdb); + if (!rc) + mgs_put_fsdb(mgs, fsdb); + + RETURN(rc); +} + +/* Setup params fsdb and log + */ +int mgs_params_fsdb_setup(const struct lu_env *env, struct mgs_device *mgs) +{ + struct fs_db *fsdb = NULL; + struct llog_handle *params_llh = NULL; + int rc; + ENTRY; + + rc = mgs_find_or_make_fsdb(env, mgs, PARAMS_FILENAME, &fsdb); + if (!rc) { + mutex_lock(&fsdb->fsdb_mutex); + rc = record_start_log(env, mgs, ¶ms_llh, PARAMS_FILENAME); + if (!rc) + rc = record_end_log(env, ¶ms_llh); + mutex_unlock(&fsdb->fsdb_mutex); + mgs_put_fsdb(mgs, fsdb); + } + + RETURN(rc); +} + +/* Cleanup params fsdb and log + */ +int mgs_params_fsdb_cleanup(const struct lu_env *env, struct mgs_device *mgs) +{ + int rc; + + rc = mgs_erase_logs(env, mgs, PARAMS_FILENAME); + return rc == -ENOENT ? 0 : rc; +} + +/** + * Fill in the mgs_target_info based on data devname and param provide. + * + * @env thread context + * @mgs mgs device + * @mti mgs target info. We want to set this based other paramters + * passed to this function. Once setup we write it to the config + * logs. + * @devname optional OBD device name + * @param string that contains both what tunable to set and the value to + * set it to. + * + * RETURN 0 for success + * negative error number on failure + **/ +static int mgs_set_conf_param(const struct lu_env *env, struct mgs_device *mgs, + struct mgs_target_info *mti, const char *devname, + const char *param) +{ + struct fs_db *fsdb = NULL; + int dev_type; + int rc = 0; + + ENTRY; + /* lustre, lustre-mdtlov, lustre-client, lustre-MDT0000 */ + if (!devname) { + size_t len; + + /* We have two possible cases here: + * + * 1) the device name embedded in the param: + * lustre-OST0000.osc.max_dirty_mb=32 + * + * 2) the file system name is embedded in + * the param: lustre.sys.at.min=0 + */ + len = strcspn(param, ".="); + if (!len || param[len] == '=') + RETURN(-EINVAL); + + if (len >= sizeof(mti->mti_svname)) + RETURN(-E2BIG); + + snprintf(mti->mti_svname, sizeof(mti->mti_svname), + "%.*s", (int)len, param); + param += len + 1; + } else { + if (strlcpy(mti->mti_svname, devname, sizeof(mti->mti_svname)) >= + sizeof(mti->mti_svname)) + RETURN(-E2BIG); + } + + if (!strlen(mti->mti_svname)) { + LCONSOLE_ERROR_MSG(0x14d, "No target specified: %s\n", param); + RETURN(-ENOSYS); + } + + dev_type = mgs_parse_devname(mti->mti_svname, mti->mti_fsname, + &mti->mti_stripe_index); + switch (dev_type) { + /* For this case we have an invalid obd device name */ + case -ENXIO: + CDEBUG(D_MGS, "%s don't contain an index\n", mti->mti_svname); + strlcpy(mti->mti_fsname, mti->mti_svname, MTI_NAME_MAXLEN); + dev_type = 0; + break; + /* Not an obd device, assume devname is the fsname. + * User might of only provided fsname and not obd device + */ + case -EINVAL: + CDEBUG(D_MGS, "%s is seen as a file system name\n", mti->mti_svname); + strlcpy(mti->mti_fsname, mti->mti_svname, MTI_NAME_MAXLEN); + dev_type = 0; + break; + default: + if (dev_type < 0) + GOTO(out, rc = dev_type); + + /* param related to llite isn't allowed to set by OST or MDT */ + if (dev_type & LDD_F_SV_TYPE_OST || + dev_type & LDD_F_SV_TYPE_MDT) { + /* param related to llite isn't allowed to set by OST + * or MDT + */ + if (!strncmp(param, PARAM_LLITE, + sizeof(PARAM_LLITE) - 1)) + GOTO(out, rc = -EINVAL); + + /* Strip -osc or -mdc suffix from svname */ + if (server_make_name(dev_type, mti->mti_stripe_index, + mti->mti_fsname, mti->mti_svname, + sizeof(mti->mti_svname))) + GOTO(out, rc = -EINVAL); + } + break; + } + + if (strlcpy(mti->mti_params, param, sizeof(mti->mti_params)) >= + sizeof(mti->mti_params)) GOTO(out, rc = -E2BIG); - rc = server_name2index(mti->mti_svname, &mti->mti_stripe_index, &tmp); - if (rc < 0) - /* Not a valid server; may be only fsname */ - rc = 0; - else - /* Strip -osc or -mdc suffix from svname */ - if (server_make_name(rc, mti->mti_stripe_index, mti->mti_fsname, - mti->mti_svname)) - GOTO(out, rc = -EINVAL); + + CDEBUG(D_MGS, "set_conf_param fs='%s' device='%s' param='%s'\n", + mti->mti_fsname, mti->mti_svname, mti->mti_params); + + rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb); + if (rc) + GOTO(out, rc); + + if (!test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags) && + test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) { + CERROR("No filesystem targets for %s. cfg_device from lctl " + "is '%s'\n", mti->mti_fsname, mti->mti_svname); + mgs_unlink_fsdb(mgs, fsdb); + GOTO(out, rc = -EINVAL); + } + /* * Revoke lock so everyone updates. Should be alright if * someone was already reading while we were updating the logs, * so we don't really need to hold the lock while we're * writing (above). */ - if (lcfg->lcfg_command == LCFG_SET_PARAM) { - mti->mti_flags = rc | LDD_F_PARAM2; - mutex_lock(&fsdb->fsdb_mutex); - rc = mgs_write_log_param2(env, mgs, fsdb, mti, mti->mti_params); - mutex_unlock(&fsdb->fsdb_mutex); - mgs_revoke_lock(mgs, fsdb, CONFIG_T_PARAMS); + mti->mti_flags = dev_type | LDD_F_PARAM; + mutex_lock(&fsdb->fsdb_mutex); + rc = mgs_write_log_param(env, mgs, fsdb, mti, mti->mti_params); + mutex_unlock(&fsdb->fsdb_mutex); + mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG); + +out: + if (fsdb) + mgs_put_fsdb(mgs, fsdb); + + RETURN(rc); +} + +static int mgs_set_param2(const struct lu_env *env, struct mgs_device *mgs, + struct mgs_target_info *mti, const char *param) +{ + struct fs_db *fsdb = NULL; + int dev_type; + size_t len; + int rc; + + if (strlcpy(mti->mti_params, param, sizeof(mti->mti_params)) >= + sizeof(mti->mti_params)) + GOTO(out, rc = -E2BIG); + + len = strcspn(param, ".="); + if (len && param[len] != '=') { + struct list_head *tmp; + char *ptr; + + param += len + 1; + ptr = strchr(param, '.'); + + len = strlen(param); + if (ptr) + len -= strlen(ptr); + if (len >= sizeof(mti->mti_svname)) + GOTO(out, rc = -E2BIG); + + snprintf(mti->mti_svname, sizeof(mti->mti_svname), "%.*s", + (int)len, param); + + mutex_lock(&mgs->mgs_mutex); + if (unlikely(list_empty(&mgs->mgs_fs_db_list))) { + mutex_unlock(&mgs->mgs_mutex); + GOTO(out, rc = -ENODEV); + } + + list_for_each(tmp, &mgs->mgs_fs_db_list) { + fsdb = list_entry(tmp, struct fs_db, fsdb_list); + if (fsdb->fsdb_has_lproc_entry && + strcmp(fsdb->fsdb_name, "params") != 0 && + strstr(param, fsdb->fsdb_name)) { + snprintf(mti->mti_svname, + sizeof(mti->mti_svname), "%s", + fsdb->fsdb_name); + break; + } + fsdb = NULL; + } + + if (!fsdb) { + snprintf(mti->mti_svname, sizeof(mti->mti_svname), + "general"); + } + mutex_unlock(&mgs->mgs_mutex); } else { - mti->mti_flags = rc | LDD_F_PARAM; - mutex_lock(&fsdb->fsdb_mutex); - rc = mgs_write_log_param(env, mgs, fsdb, mti, mti->mti_params); - mutex_unlock(&fsdb->fsdb_mutex); - mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG); + snprintf(mti->mti_svname, sizeof(mti->mti_svname), "general"); } + CDEBUG(D_MGS, "set_param2 fs='%s' device='%s' param='%s'\n", + mti->mti_fsname, mti->mti_svname, mti->mti_params); + + /* The return value should be the device type i.e LDD_F_SV_TYPE_XXX. + * A returned error tells us we don't have a target obd device. + */ + dev_type = server_name2index(mti->mti_svname, &mti->mti_stripe_index, + NULL); + if (dev_type < 0) + dev_type = 0; + + /* the return value should be the device type i.e LDD_F_SV_TYPE_XXX. + * Strip -osc or -mdc suffix from svname + */ + if ((dev_type & LDD_F_SV_TYPE_OST || dev_type & LDD_F_SV_TYPE_MDT) && + server_make_name(dev_type, mti->mti_stripe_index, + mti->mti_fsname, mti->mti_svname, + sizeof(mti->mti_svname))) + GOTO(out, rc = -EINVAL); + + rc = mgs_find_or_make_fsdb(env, mgs, PARAMS_FILENAME, &fsdb); + if (rc) + GOTO(out, rc); + /* + * Revoke lock so everyone updates. Should be alright if + * someone was already reading while we were updating the logs, + * so we don't really need to hold the lock while we're + * writing (above). + */ + mti->mti_flags = dev_type | LDD_F_PARAM2; + mutex_lock(&fsdb->fsdb_mutex); + rc = mgs_write_log_param2(env, mgs, fsdb, mti, mti->mti_params); + mutex_unlock(&fsdb->fsdb_mutex); + mgs_revoke_lock(mgs, fsdb, CONFIG_T_PARAMS); + mgs_put_fsdb(mgs, fsdb); out: - OBD_FREE_PTR(mti); - RETURN(rc); + RETURN(rc); +} + +/* Set a permanent (config log) param for a target or fs + * + * @lcfg buf0 may contain the device (testfs-MDT0000) name + * buf1 contains the single parameter + */ +int mgs_set_param(const struct lu_env *env, struct mgs_device *mgs, + struct lustre_cfg *lcfg) +{ + const char *param = lustre_cfg_string(lcfg, 1); + struct mgs_target_info *mti; + int rc; + + /* Create a fake mti to hold everything */ + OBD_ALLOC_PTR(mti); + if (!mti) + return -ENOMEM; + + print_lustre_cfg(lcfg); + + if (lcfg->lcfg_command == LCFG_PARAM) { + /* For the case of lctl conf_param devname can be + * lustre, lustre-mdtlov, lustre-client, lustre-MDT0000 + */ + const char *devname = lustre_cfg_string(lcfg, 0); + + rc = mgs_set_conf_param(env, mgs, mti, devname, param); + } else { + /* In the case of lctl set_param -P lcfg[0] will always + * be 'general'. At least for now. + */ + rc = mgs_set_param2(env, mgs, mti, param); + } + + OBD_FREE_PTR(mti); + + return rc; } static int mgs_write_log_pool(const struct lu_env *env, @@ -3883,7 +5382,7 @@ static int mgs_write_log_pool(const struct lu_env *env, if (rc) goto out; rc = record_base(env, llh, tgtname, 0, cmd, - fsname, poolname, ostname, 0); + fsname, poolname, ostname, NULL); if (rc) goto out; rc = record_marker(env, llh, fsdb, CM_END, tgtname, comment); @@ -3894,14 +5393,15 @@ out: int mgs_nodemap_cmd(const struct lu_env *env, struct mgs_device *mgs, enum lcfg_command_type cmd, const char *nodemap_name, - const char *param) + char *param) { - lnet_nid_t nid[2]; - bool bool_switch; - __u32 int_id; - int rc = 0; - ENTRY; + lnet_nid_t nid[2]; + u32 idmap[2]; + bool bool_switch; + u32 int_id; + int rc = 0; + ENTRY; switch (cmd) { case LCFG_NODEMAP_ADD: rc = nodemap_add(nodemap_name); @@ -3922,21 +5422,83 @@ int mgs_nodemap_cmd(const struct lu_env *env, struct mgs_device *mgs, rc = nodemap_del_range(nodemap_name, nid); break; case LCFG_NODEMAP_ADMIN: - bool_switch = simple_strtoul(param, NULL, 10); + rc = kstrtobool(param, &bool_switch); + if (rc) + break; rc = nodemap_set_allow_root(nodemap_name, bool_switch); break; + case LCFG_NODEMAP_DENY_UNKNOWN: + rc = kstrtobool(param, &bool_switch); + if (rc) + break; + rc = nodemap_set_deny_unknown(nodemap_name, bool_switch); + break; + case LCFG_NODEMAP_AUDIT_MODE: + rc = kstrtoul(param, 10, (unsigned long *)&bool_switch); + if (rc == 0) + rc = nodemap_set_audit_mode(nodemap_name, bool_switch); + break; + case LCFG_NODEMAP_MAP_MODE: + if (strcmp("both", param) == 0) + rc = nodemap_set_mapping_mode(nodemap_name, + NODEMAP_MAP_BOTH); + else if (strcmp("uid_only", param) == 0) + rc = nodemap_set_mapping_mode(nodemap_name, + NODEMAP_MAP_UID_ONLY); + else if (strcmp("gid_only", param) == 0) + rc = nodemap_set_mapping_mode(nodemap_name, + NODEMAP_MAP_GID_ONLY); + else + rc = -EINVAL; + break; case LCFG_NODEMAP_TRUSTED: - bool_switch = simple_strtoul(param, NULL, 10); + rc = kstrtobool(param, &bool_switch); + if (rc) + break; rc = nodemap_set_trust_client_ids(nodemap_name, bool_switch); break; case LCFG_NODEMAP_SQUASH_UID: - int_id = simple_strtoul(param, NULL, 10); + rc = kstrtouint(param, 10, &int_id); + if (rc) + break; rc = nodemap_set_squash_uid(nodemap_name, int_id); break; case LCFG_NODEMAP_SQUASH_GID: - int_id = simple_strtoul(param, NULL, 10); + rc = kstrtouint(param, 10, &int_id); + if (rc) + break; rc = nodemap_set_squash_gid(nodemap_name, int_id); break; + case LCFG_NODEMAP_ADD_UIDMAP: + case LCFG_NODEMAP_ADD_GIDMAP: + rc = nodemap_parse_idmap(param, idmap); + if (rc != 0) + break; + if (cmd == LCFG_NODEMAP_ADD_UIDMAP) + rc = nodemap_add_idmap(nodemap_name, NODEMAP_UID, + idmap); + else + rc = nodemap_add_idmap(nodemap_name, NODEMAP_GID, + idmap); + break; + case LCFG_NODEMAP_DEL_UIDMAP: + case LCFG_NODEMAP_DEL_GIDMAP: + rc = nodemap_parse_idmap(param, idmap); + if (rc != 0) + break; + if (cmd == LCFG_NODEMAP_DEL_UIDMAP) + rc = nodemap_del_idmap(nodemap_name, NODEMAP_UID, + idmap); + else + rc = nodemap_del_idmap(nodemap_name, NODEMAP_GID, + idmap); + break; + case LCFG_NODEMAP_SET_FILESET: + rc = nodemap_set_fileset(nodemap_name, param); + break; + case LCFG_NODEMAP_SET_SEPOL: + rc = nodemap_set_sepol(nodemap_name, param); + break; default: rc = -EINVAL; } @@ -3954,18 +5516,21 @@ int mgs_pool_cmd(const struct lu_env *env, struct mgs_device *mgs, char *label = NULL, *canceled_label = NULL; int label_sz; struct mgs_target_info *mti = NULL; - int rc, i; - ENTRY; + bool checked = false; + bool locked = false; + bool free = false; + int rc, i; + ENTRY; rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb); - if (rc) { - CERROR("Can't get db for %s\n", fsname); - RETURN(rc); - } + if (rc) { + CERROR("Can't get db for %s\n", fsname); + RETURN(rc); + } if (test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) { - CERROR("%s is not defined\n", fsname); - mgs_free_fsdb(mgs, fsdb); - RETURN(-EINVAL); + CERROR("%s is not defined\n", fsname); + free = true; + GOTO(out_fsdb, rc = -EINVAL); } label_sz = 10 + strlen(fsname) + strlen(poolname); @@ -3982,8 +5547,8 @@ int mgs_pool_cmd(const struct lu_env *env, struct mgs_device *mgs, } OBD_ALLOC(label, label_sz); - if (label == NULL) - RETURN(-ENOMEM); + if (!label) + GOTO(out_fsdb, rc = -ENOMEM); switch(cmd) { case LCFG_POOL_NEW: @@ -4016,53 +5581,67 @@ int mgs_pool_cmd(const struct lu_env *env, struct mgs_device *mgs, break; } - if (canceled_label != NULL) { - OBD_ALLOC_PTR(mti); - if (mti == NULL) - GOTO(out_cancel, rc = -ENOMEM); - } + OBD_ALLOC_PTR(mti); + if (mti == NULL) + GOTO(out_cancel, rc = -ENOMEM); + strncpy(mti->mti_svname, "lov pool", sizeof(mti->mti_svname)); mutex_lock(&fsdb->fsdb_mutex); - /* write pool def to all MDT logs */ - for (i = 0; i < INDEX_MAP_SIZE * 8; i++) { + locked = true; + /* write pool def to all MDT logs */ + for (i = 0; i < INDEX_MAP_SIZE * 8; i++) { if (test_bit(i, fsdb->fsdb_mdt_index_map)) { rc = name_create_mdt_and_lov(&logname, &lovname, fsdb, i); - if (rc) { - mutex_unlock(&fsdb->fsdb_mutex); + if (rc) GOTO(out_mti, rc); + + if (!checked && (canceled_label == NULL)) { + rc = mgs_check_marker(env, mgs, fsdb, mti, + logname, lovname, label); + if (rc) { + name_destroy(&logname); + name_destroy(&lovname); + GOTO(out_mti, + rc = (rc == LLOG_PROC_BREAK ? + -EEXIST : rc)); + } + checked = true; } - if (canceled_label != NULL) { - strcpy(mti->mti_svname, "lov pool"); + if (canceled_label != NULL) rc = mgs_modify(env, mgs, fsdb, mti, logname, lovname, canceled_label, CM_SKIP); - } if (rc >= 0) rc = mgs_write_log_pool(env, mgs, logname, fsdb, lovname, cmd, fsname, poolname, ostname, label); - name_destroy(&logname); - name_destroy(&lovname); - if (rc) { - mutex_unlock(&fsdb->fsdb_mutex); + name_destroy(&logname); + name_destroy(&lovname); + if (rc) GOTO(out_mti, rc); - } - } - } + } + } rc = name_create(&logname, fsname, "-client"); - if (rc) { - mutex_unlock(&fsdb->fsdb_mutex); + if (rc) GOTO(out_mti, rc); + + if (!checked && (canceled_label == NULL)) { + rc = mgs_check_marker(env, mgs, fsdb, mti, logname, + fsdb->fsdb_clilov, label); + if (rc) { + name_destroy(&logname); + GOTO(out_mti, rc = (rc == LLOG_PROC_BREAK ? + -EEXIST : rc)); + } } if (canceled_label != NULL) { rc = mgs_modify(env, mgs, fsdb, mti, logname, fsdb->fsdb_clilov, canceled_label, CM_SKIP); if (rc < 0) { - mutex_unlock(&fsdb->fsdb_mutex); name_destroy(&logname); GOTO(out_mti, rc); } @@ -4071,18 +5650,27 @@ int mgs_pool_cmd(const struct lu_env *env, struct mgs_device *mgs, rc = mgs_write_log_pool(env, mgs, logname, fsdb, fsdb->fsdb_clilov, cmd, fsname, poolname, ostname, label); mutex_unlock(&fsdb->fsdb_mutex); + locked = false; name_destroy(&logname); - /* request for update */ + /* request for update */ mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG); - EXIT; + GOTO(out_mti, rc); + out_mti: - if (mti != NULL) - OBD_FREE_PTR(mti); + if (locked) + mutex_unlock(&fsdb->fsdb_mutex); + if (mti != NULL) + OBD_FREE_PTR(mti); out_cancel: if (canceled_label != NULL) OBD_FREE(canceled_label, label_sz); out_label: OBD_FREE(label, label_sz); - return rc; +out_fsdb: + if (free) + mgs_unlink_fsdb(mgs, fsdb); + mgs_put_fsdb(mgs, fsdb); + + return rc; }