4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2017, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
31 * lustre/mgs/mgs_llog.c
33 * Lustre Management Server (mgs) config llog creation
35 * Author: Nathan Rutman <nathan@clusterfs.com>
36 * Author: Alex Zhuravlev <bzzz@whamcloud.com>
37 * Author: Mikhail Pershin <tappro@whamcloud.com>
40 #define DEBUG_SUBSYSTEM S_MGS
41 #define D_MGS D_CONFIG
44 #include <uapi/linux/lustre/lustre_ioctl.h>
45 #include <uapi/linux/lustre/lustre_param.h>
46 #include <lustre_sec.h>
47 #include <lustre_quota.h>
48 #include <lustre_sec.h>
50 #include "mgs_internal.h"
52 /********************** Class functions ********************/
55 * Find all logs in CONFIG directory and link then into list.
57 * \param[in] env pointer to the thread context
58 * \param[in] mgs pointer to the mgs device
59 * \param[out] log_list the list to hold the found llog name entry
61 * \retval 0 for success
62 * \retval negative error number on failure
64 int class_dentry_readdir(const struct lu_env *env, struct mgs_device *mgs,
65 struct list_head *log_list)
67 struct dt_object *dir = mgs->mgs_configs_dir;
68 const struct dt_it_ops *iops;
70 struct mgs_direntry *de;
74 INIT_LIST_HEAD(log_list);
77 LASSERT(dir->do_index_ops);
79 iops = &dir->do_index_ops->dio_it;
80 it = iops->init(env, dir, LUDA_64BITHASH);
84 rc = iops->load(env, it, 0);
90 key = (void *)iops->key(env, it);
92 CERROR("%s: key failed when listing %s: rc = %d\n",
93 mgs->mgs_obd->obd_name, MOUNT_CONFIGS_DIR,
97 key_sz = iops->key_size(env, it);
100 /* filter out "." and ".." entries */
104 if (key_sz == 2 && key[1] == '.')
108 /* filter out backup files */
109 if (lu_name_is_backup_file(key, key_sz, NULL)) {
110 CDEBUG(D_MGS, "Skipping backup file %.*s\n",
115 de = mgs_direntry_alloc(key_sz + 1);
121 memcpy(de->mde_name, key, key_sz);
122 de->mde_name[key_sz] = 0;
124 list_add(&de->mde_list, log_list);
127 rc = iops->next(env, it);
137 struct mgs_direntry *n;
139 CERROR("%s: key failed when listing %s: rc = %d\n",
140 mgs->mgs_obd->obd_name, MOUNT_CONFIGS_DIR, rc);
142 list_for_each_entry_safe(de, n, log_list, mde_list) {
143 list_del_init(&de->mde_list);
144 mgs_direntry_free(de);
151 /******************** DB functions *********************/
153 static inline int name_create(char **newname, char *prefix, char *suffix)
156 OBD_ALLOC(*newname, strlen(prefix) + strlen(suffix) + 1);
159 sprintf(*newname, "%s%s", prefix, suffix);
163 static inline void name_destroy(char **name)
166 OBD_FREE(*name, strlen(*name) + 1);
170 static inline int name_create_osp(char **ospname, char **devtype, char *tgtname,
173 size_t size = strlen(tgtname) + sizeof("-osx-MDTXXXX");
177 if (strstr(tgtname, "-MDT"))
179 else if (strstr(tgtname, "-OST"))
184 OBD_ALLOC(out, size);
188 if (snprintf(out, size, "%s-%s-MDT%04x", tgtname, type, index) >= size)
198 static inline int name_create_lov(char **lovname, char *mdtname)
200 return name_create(lovname, mdtname, "-mdtlov");
204 struct mgs_fsdb_handler_data
210 /* from the (client) config log, figure out:
211 * 1. which ost's/mdt's are configured (by index)
212 * 2. what the last config step is
213 * 3. COMPAT_18 osc name
215 /* It might be better to have a separate db file, instead of parsing the info
216 out of the client log. This is slow and potentially error-prone. */
217 static int mgs_fsdb_handler(const struct lu_env *env, struct llog_handle *llh,
218 struct llog_rec_hdr *rec, void *data)
220 struct mgs_fsdb_handler_data *d = data;
221 struct fs_db *fsdb = d->fsdb;
222 int cfg_len = rec->lrh_len;
223 char *cfg_buf = (char *)(rec + 1);
224 struct lustre_cfg *lcfg;
229 if (rec->lrh_type != OBD_CFG_REC) {
230 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
234 rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
236 CERROR("Insane cfg\n");
240 lcfg = (struct lustre_cfg *)cfg_buf;
242 CDEBUG(D_INFO, "cmd %x %s %s\n", lcfg->lcfg_command,
243 lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
245 /* Figure out ost indicies */
246 /* lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1 */
247 if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD ||
248 lcfg->lcfg_command == LCFG_LOV_DEL_OBD) {
249 rc = kstrtouint(lustre_cfg_string(lcfg, 2), 10, &index);
253 CDEBUG(D_MGS, "OST index for %s is %u (%s)\n",
254 lustre_cfg_string(lcfg, 1), index,
255 lustre_cfg_string(lcfg, 2));
256 set_bit(index, fsdb->fsdb_ost_index_map);
259 /* Figure out mdt indicies */
260 /* attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:1d834_MNT_client_03f */
261 if ((lcfg->lcfg_command == LCFG_ATTACH) &&
262 (strcmp(lustre_cfg_string(lcfg, 1), LUSTRE_MDC_NAME) == 0)) {
263 rc = server_name2index(lustre_cfg_string(lcfg, 0),
265 if (rc != LDD_F_SV_TYPE_MDT) {
266 CWARN("Unparsable MDC name %s, assuming index 0\n",
267 lustre_cfg_string(lcfg, 0));
271 CDEBUG(D_MGS, "MDT index is %u\n", index);
272 if (!test_bit(index, fsdb->fsdb_mdt_index_map)) {
273 set_bit(index, fsdb->fsdb_mdt_index_map);
274 fsdb->fsdb_mdt_count++;
279 * figure out the old config. fsdb_gen = 0 means old log
280 * It is obsoleted and not supported anymore
282 if (fsdb->fsdb_gen == 0) {
283 CERROR("Old config format is not supported\n");
288 * compat to 1.8, check osc name used by MDT0 to OSTs, bz18548.
290 if (!test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags) &&
291 lcfg->lcfg_command == LCFG_ATTACH &&
292 strcmp(lustre_cfg_string(lcfg, 1), LUSTRE_OSC_NAME) == 0) {
293 if (OBD_OCD_VERSION_MAJOR(d->ver) == 1 &&
294 OBD_OCD_VERSION_MINOR(d->ver) <= 8) {
295 CWARN("MDT using 1.8 OSC name scheme\n");
296 set_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags);
300 if (lcfg->lcfg_command == LCFG_MARKER) {
301 struct cfg_marker *marker;
303 marker = lustre_cfg_buf(lcfg, 1);
304 d->ver = marker->cm_vers;
306 /* Keep track of the latest marker step */
307 fsdb->fsdb_gen = max(fsdb->fsdb_gen, marker->cm_step);
313 /* fsdb->fsdb_mutex is already held in mgs_find_or_make_fsdb*/
314 static int mgs_get_fsdb_from_llog(const struct lu_env *env,
315 struct mgs_device *mgs,
319 struct llog_handle *loghandle;
320 struct llog_ctxt *ctxt;
321 struct mgs_fsdb_handler_data d = {
328 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
329 LASSERT(ctxt != NULL);
330 rc = name_create(&logname, fsdb->fsdb_name, "-client");
333 rc = llog_open_create(env, ctxt, &loghandle, NULL, logname);
337 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
341 if (llog_get_size(loghandle) <= 1)
342 set_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags);
344 rc = llog_process(env, loghandle, mgs_fsdb_handler, (void *)&d, NULL);
345 CDEBUG(D_INFO, "get_db = %d\n", rc);
347 llog_close(env, loghandle);
349 name_destroy(&logname);
356 static void mgs_free_fsdb_srpc(struct fs_db *fsdb)
358 struct mgs_tgt_srpc_conf *tgtconf;
360 /* free target-specific rules */
361 while (fsdb->fsdb_srpc_tgt) {
362 tgtconf = fsdb->fsdb_srpc_tgt;
363 fsdb->fsdb_srpc_tgt = tgtconf->mtsc_next;
365 LASSERT(tgtconf->mtsc_tgt);
367 sptlrpc_rule_set_free(&tgtconf->mtsc_rset);
368 OBD_FREE(tgtconf->mtsc_tgt, strlen(tgtconf->mtsc_tgt) + 1);
369 OBD_FREE_PTR(tgtconf);
372 /* free general rules */
373 sptlrpc_rule_set_free(&fsdb->fsdb_srpc_gen);
376 static void mgs_unlink_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
378 mutex_lock(&mgs->mgs_mutex);
379 if (likely(!list_empty(&fsdb->fsdb_list))) {
380 LASSERTF(atomic_read(&fsdb->fsdb_ref) >= 2,
381 "Invalid ref %d on %s\n",
382 atomic_read(&fsdb->fsdb_ref),
385 list_del_init(&fsdb->fsdb_list);
386 /* Drop the reference on the list.*/
387 mgs_put_fsdb(mgs, fsdb);
389 mutex_unlock(&mgs->mgs_mutex);
392 /* The caller must hold mgs->mgs_mutex. */
393 static inline struct fs_db *
394 mgs_find_fsdb_noref(struct mgs_device *mgs, const char *fsname)
397 struct list_head *tmp;
399 list_for_each(tmp, &mgs->mgs_fs_db_list) {
400 fsdb = list_entry(tmp, struct fs_db, fsdb_list);
401 if (strcmp(fsdb->fsdb_name, fsname) == 0)
408 /* The caller must hold mgs->mgs_mutex. */
409 static void mgs_remove_fsdb_by_name(struct mgs_device *mgs, const char *name)
413 fsdb = mgs_find_fsdb_noref(mgs, name);
415 list_del_init(&fsdb->fsdb_list);
416 /* Drop the reference on the list.*/
417 mgs_put_fsdb(mgs, fsdb);
421 /* The caller must hold mgs->mgs_mutex. */
422 struct fs_db *mgs_find_fsdb(struct mgs_device *mgs, const char *fsname)
426 fsdb = mgs_find_fsdb_noref(mgs, fsname);
428 atomic_inc(&fsdb->fsdb_ref);
433 /* The caller must hold mgs->mgs_mutex. */
434 static struct fs_db *mgs_new_fsdb(const struct lu_env *env,
435 struct mgs_device *mgs, char *fsname)
441 if (strlen(fsname) >= sizeof(fsdb->fsdb_name)) {
442 CERROR("fsname %s is too long\n", fsname);
444 RETURN(ERR_PTR(-EINVAL));
449 RETURN(ERR_PTR(-ENOMEM));
451 strncpy(fsdb->fsdb_name, fsname, sizeof(fsdb->fsdb_name));
452 mutex_init(&fsdb->fsdb_mutex);
453 INIT_LIST_HEAD(&fsdb->fsdb_list);
454 set_bit(FSDB_UDESC, &fsdb->fsdb_flags);
456 INIT_LIST_HEAD(&fsdb->fsdb_clients);
457 atomic_set(&fsdb->fsdb_notify_phase, 0);
458 init_waitqueue_head(&fsdb->fsdb_notify_waitq);
459 init_completion(&fsdb->fsdb_notify_comp);
461 if (strcmp(fsname, MGSSELF_NAME) == 0) {
462 set_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags);
463 fsdb->fsdb_mgs = mgs;
464 if (logname_is_barrier(fsname))
467 OBD_ALLOC(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
468 if (!fsdb->fsdb_mdt_index_map) {
469 CERROR("No memory for MDT index maps\n");
471 GOTO(err, rc = -ENOMEM);
474 OBD_ALLOC(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
475 if (!fsdb->fsdb_ost_index_map) {
476 CERROR("No memory for OST index maps\n");
478 GOTO(err, rc = -ENOMEM);
481 if (logname_is_barrier(fsname))
484 rc = name_create(&fsdb->fsdb_clilov, fsname, "-clilov");
488 rc = name_create(&fsdb->fsdb_clilmv, fsname, "-clilmv");
492 /* initialise data for NID table */
493 mgs_ir_init_fs(env, mgs, fsdb);
494 lproc_mgs_add_live(mgs, fsdb);
497 if (!test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags) &&
498 strcmp(PARAMS_FILENAME, fsname) != 0) {
499 /* populate the db from the client llog */
500 rc = mgs_get_fsdb_from_llog(env, mgs, fsdb);
502 CERROR("Can't get db from client log %d\n", rc);
508 /* populate srpc rules from params llog */
509 rc = mgs_get_fsdb_srpc_from_llog(env, mgs, fsdb);
511 CERROR("Can't get db from params log %d\n", rc);
517 /* One ref is for the fsdb on the list.
518 * The other ref is for the caller. */
519 atomic_set(&fsdb->fsdb_ref, 2);
520 list_add(&fsdb->fsdb_list, &mgs->mgs_fs_db_list);
525 atomic_set(&fsdb->fsdb_ref, 1);
526 mgs_put_fsdb(mgs, fsdb);
531 static void mgs_free_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
533 LASSERT(list_empty(&fsdb->fsdb_list));
535 lproc_mgs_del_live(mgs, fsdb);
537 /* deinitialize fsr */
539 mgs_ir_fini_fs(mgs, fsdb);
541 if (fsdb->fsdb_ost_index_map)
542 OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
543 if (fsdb->fsdb_mdt_index_map)
544 OBD_FREE(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
545 name_destroy(&fsdb->fsdb_clilov);
546 name_destroy(&fsdb->fsdb_clilmv);
547 mgs_free_fsdb_srpc(fsdb);
551 void mgs_put_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
553 if (atomic_dec_and_test(&fsdb->fsdb_ref))
554 mgs_free_fsdb(mgs, fsdb);
557 int mgs_init_fsdb_list(struct mgs_device *mgs)
559 INIT_LIST_HEAD(&mgs->mgs_fs_db_list);
563 int mgs_cleanup_fsdb_list(struct mgs_device *mgs)
566 struct list_head *tmp, *tmp2;
568 mutex_lock(&mgs->mgs_mutex);
569 list_for_each_safe(tmp, tmp2, &mgs->mgs_fs_db_list) {
570 fsdb = list_entry(tmp, struct fs_db, fsdb_list);
571 list_del_init(&fsdb->fsdb_list);
572 mgs_put_fsdb(mgs, fsdb);
574 mutex_unlock(&mgs->mgs_mutex);
578 /* The caller must hold mgs->mgs_mutex. */
579 int mgs_find_or_make_fsdb_nolock(const struct lu_env *env,
580 struct mgs_device *mgs,
581 char *name, struct fs_db **dbh)
587 fsdb = mgs_find_fsdb(mgs, name);
589 fsdb = mgs_new_fsdb(env, mgs, name);
593 CDEBUG(D_MGS, "Created new db: rc = %d\n", rc);
602 int mgs_find_or_make_fsdb(const struct lu_env *env, struct mgs_device *mgs,
603 char *name, struct fs_db **dbh)
608 mutex_lock(&mgs->mgs_mutex);
609 rc = mgs_find_or_make_fsdb_nolock(env, mgs, name, dbh);
610 mutex_unlock(&mgs->mgs_mutex);
617 * -1= empty client log
619 int mgs_check_index(const struct lu_env *env,
620 struct mgs_device *mgs,
621 struct mgs_target_info *mti)
628 LASSERT(!(mti->mti_flags & LDD_F_NEED_INDEX));
630 rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb);
632 CERROR("Can't get db for %s\n", mti->mti_fsname);
636 if (test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags))
639 if (mti->mti_flags & LDD_F_SV_TYPE_OST)
640 imap = fsdb->fsdb_ost_index_map;
641 else if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
642 imap = fsdb->fsdb_mdt_index_map;
644 GOTO(out, rc = -EINVAL);
646 if (test_bit(mti->mti_stripe_index, imap))
652 mgs_put_fsdb(mgs, fsdb);
656 static __inline__ int next_index(void *index_map, int map_len)
660 for (i = 0; i < map_len * 8; i++)
661 if (!test_bit(i, index_map))
663 CERROR("max index %d exceeded.\n", i);
667 /* Make the mdt/ost server obd name based on the filesystem name */
668 static bool server_make_name(u32 flags, u16 index, const char *fs,
669 char *name_buf, size_t name_buf_size)
671 bool invalid_flag = false;
673 if (flags & (LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST)) {
676 if (flags & LDD_F_WRITECONF)
678 else if (flags & LDD_F_VIRGIN)
681 if (!(flags & LDD_F_SV_ALL))
682 snprintf(name_buf, name_buf_size, "%.8s%c%s%04x", fs,
684 (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST",
686 } else if (flags & LDD_F_SV_TYPE_MGS) {
687 snprintf(name_buf, name_buf_size, "MGS");
689 CERROR("unknown server type %#x\n", flags);
696 * 0 newly marked as in use
698 * +EALREADY for update of an old index
700 static int mgs_set_index(const struct lu_env *env,
701 struct mgs_device *mgs,
702 struct mgs_target_info *mti)
710 rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb);
712 CERROR("Can't get db for %s\n", mti->mti_fsname);
716 mutex_lock(&fsdb->fsdb_mutex);
717 if (mti->mti_flags & LDD_F_SV_TYPE_OST)
718 imap = fsdb->fsdb_ost_index_map;
719 else if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
720 imap = fsdb->fsdb_mdt_index_map;
722 GOTO(out_up, rc = -EINVAL);
724 if (mti->mti_flags & LDD_F_NEED_INDEX) {
725 rc = next_index(imap, INDEX_MAP_SIZE);
727 GOTO(out_up, rc = -ERANGE);
728 mti->mti_stripe_index = rc;
731 /* the last index(0xffff) is reserved for default value. */
732 if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8 - 1) {
733 LCONSOLE_ERROR_MSG(0x13f, "Server %s requested index %u, "
734 "but index must be less than %u.\n",
735 mti->mti_svname, mti->mti_stripe_index,
736 INDEX_MAP_SIZE * 8 - 1);
737 GOTO(out_up, rc = -ERANGE);
740 if (test_bit(mti->mti_stripe_index, imap)) {
741 if ((mti->mti_flags & LDD_F_VIRGIN) &&
742 !(mti->mti_flags & LDD_F_WRITECONF)) {
745 "Server %s requested index %d, but that index is already in use. Use --writeconf to force\n",
747 mti->mti_stripe_index);
748 GOTO(out_up, rc = -EADDRINUSE);
750 CDEBUG(D_MGS, "Server %s updating index %d\n",
751 mti->mti_svname, mti->mti_stripe_index);
752 GOTO(out_up, rc = EALREADY);
755 set_bit(mti->mti_stripe_index, imap);
756 if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
757 fsdb->fsdb_mdt_count++;
760 set_bit(mti->mti_stripe_index, imap);
761 clear_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags);
762 if (server_make_name(mti->mti_flags & ~(LDD_F_VIRGIN | LDD_F_WRITECONF),
763 mti->mti_stripe_index, mti->mti_fsname,
764 mti->mti_svname, sizeof(mti->mti_svname))) {
765 CERROR("unknown server type %#x\n", mti->mti_flags);
766 GOTO(out_up, rc = -EINVAL);
769 CDEBUG(D_MGS, "Set index for %s to %d\n", mti->mti_svname,
770 mti->mti_stripe_index);
772 GOTO(out_up, rc = 0);
775 mutex_unlock(&fsdb->fsdb_mutex);
776 mgs_put_fsdb(mgs, fsdb);
780 struct mgs_modify_lookup {
781 struct cfg_marker mml_marker;
785 enum mgs_search_pool_status {
786 POOL_STATUS_NONE = 0,
788 POOL_STATUS_OST_EXIST,
791 struct mgs_search_pool_data {
796 enum mgs_search_pool_status msp_status;
800 static int mgs_search_pool_cb(const struct lu_env *env,
801 struct llog_handle *llh,
802 struct llog_rec_hdr *rec, void *data)
804 struct mgs_search_pool_data *d = data;
805 struct lustre_cfg *lcfg = REC_DATA(rec);
806 int cfg_len = REC_DATA_LEN(rec);
809 char *ostname = NULL;
813 if (rec->lrh_type != OBD_CFG_REC) {
814 CDEBUG(D_ERROR, "Unhandled lrh_type: %#x\n", rec->lrh_type);
818 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
820 CDEBUG(D_ERROR, "Insane cfg\n");
824 /* check if section is skipped */
825 if (lcfg->lcfg_command == LCFG_MARKER) {
826 struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1);
828 if (!(marker->cm_flags & CM_END))
831 d->msp_skip = (marker->cm_flags & CM_SKIP) ||
832 strcmp(d->msp_tgt, marker->cm_tgtname) != 0;
840 switch (lcfg->lcfg_command) {
843 ostname = lustre_cfg_string(lcfg, 3);
847 fsname = lustre_cfg_string(lcfg, 1);
848 poolname = lustre_cfg_string(lcfg, 2);
854 if (strcmp(d->msp_fs, fsname) != 0)
856 if (strcmp(d->msp_pool, poolname) != 0)
858 if (ostname && d->msp_ost && (strcmp(d->msp_ost, ostname) != 0))
861 /* Found a non-skipped marker match */
862 CDEBUG(D_MGS, "Matched pool rec %u cmd:0x%x %s.%s %s\n",
863 rec->lrh_index, lcfg->lcfg_command, fsname, poolname,
864 ostname ? ostname : "");
866 switch (lcfg->lcfg_command) {
868 d->msp_status = POOL_STATUS_OST_EXIST;
869 RETURN(LLOG_PROC_BREAK);
871 d->msp_status = POOL_STATUS_EXIST;
872 RETURN(LLOG_PROC_BREAK);
874 d->msp_status = POOL_STATUS_EXIST;
875 RETURN(LLOG_PROC_BREAK);
877 d->msp_status = POOL_STATUS_NONE;
878 RETURN(LLOG_PROC_BREAK);
887 * Search a pool in a MGS configuration.
889 * positive - return the status of the pool,
893 int mgs_search_pool(const struct lu_env *env, struct mgs_device *mgs,
894 struct fs_db *fsdb, struct mgs_target_info *mti,
895 char *logname, char *devname, char *fsname, char *poolname,
898 struct llog_handle *loghandle;
899 struct llog_ctxt *ctxt;
900 struct mgs_search_pool_data d;
901 int status = POOL_STATUS_NONE;
906 LASSERT(mutex_is_locked(&fsdb->fsdb_mutex));
911 d.msp_pool = poolname;
913 d.msp_status = POOL_STATUS_NONE;
916 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
917 LASSERT(ctxt != NULL);
918 rc = llog_open(env, ctxt, &loghandle, NULL, logname, LLOG_OPEN_EXISTS);
925 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
929 if (llog_get_size(loghandle) <= 1)
930 GOTO(out_close, rc = 0);
932 rc = llog_reverse_process(env, loghandle, mgs_search_pool_cb, &d, NULL);
933 if (rc == LLOG_PROC_BREAK)
934 status = d.msp_status;
937 llog_close(env, loghandle);
941 RETURN(rc < 0 ? rc : status);
944 static int mgs_modify_handler(const struct lu_env *env,
945 struct llog_handle *llh,
946 struct llog_rec_hdr *rec, void *data)
948 struct mgs_modify_lookup *mml = data;
949 struct cfg_marker *marker;
950 struct lustre_cfg *lcfg = REC_DATA(rec);
951 int cfg_len = REC_DATA_LEN(rec);
955 if (rec->lrh_type != OBD_CFG_REC) {
956 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
960 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
962 CERROR("Insane cfg\n");
966 /* We only care about markers */
967 if (lcfg->lcfg_command != LCFG_MARKER)
970 marker = lustre_cfg_buf(lcfg, 1);
971 if ((strcmp(mml->mml_marker.cm_comment, marker->cm_comment) == 0) &&
972 (strcmp(mml->mml_marker.cm_tgtname, marker->cm_tgtname) == 0) &&
973 !(marker->cm_flags & CM_SKIP)) {
974 /* Found a non-skipped marker match */
975 CDEBUG(D_MGS, "Changing rec %u marker %d %x->%x: %s %s\n",
976 rec->lrh_index, marker->cm_step,
977 marker->cm_flags, mml->mml_marker.cm_flags,
978 marker->cm_tgtname, marker->cm_comment);
979 /* Overwrite the old marker llog entry */
980 marker->cm_flags &= ~CM_EXCLUDE; /* in case we're unexcluding */
981 marker->cm_flags |= mml->mml_marker.cm_flags;
982 marker->cm_canceltime = mml->mml_marker.cm_canceltime;
983 rc = llog_write(env, llh, rec, rec->lrh_index);
992 * Modify an existing config log record (for CM_SKIP or CM_EXCLUDE)
994 * 0 - modified successfully,
995 * 1 - no modification was done
998 static int mgs_modify(const struct lu_env *env, struct mgs_device *mgs,
999 struct fs_db *fsdb, struct mgs_target_info *mti,
1000 char *logname, char *devname, char *comment, int flags)
1002 struct llog_handle *loghandle;
1003 struct llog_ctxt *ctxt;
1004 struct mgs_modify_lookup *mml;
1009 LASSERT(mutex_is_locked(&fsdb->fsdb_mutex));
1010 CDEBUG(D_MGS, "modify %s/%s/%s fl=%x\n", logname, devname, comment,
1013 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
1014 LASSERT(ctxt != NULL);
1015 rc = llog_open(env, ctxt, &loghandle, NULL, logname, LLOG_OPEN_EXISTS);
1022 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
1024 GOTO(out_close, rc);
1026 if (llog_get_size(loghandle) <= 1)
1027 GOTO(out_close, rc = 0);
1031 GOTO(out_close, rc = -ENOMEM);
1032 rc = strscpy(mml->mml_marker.cm_comment, comment,
1033 sizeof(mml->mml_marker.cm_comment));
1036 rc = strscpy(mml->mml_marker.cm_tgtname, devname,
1037 sizeof(mml->mml_marker.cm_tgtname));
1040 /* Modify mostly means cancel */
1041 mml->mml_marker.cm_flags = flags;
1042 mml->mml_marker.cm_canceltime = flags ? ktime_get_real_seconds() : 0;
1043 mml->mml_modified = 0;
1044 rc = llog_process(env, loghandle, mgs_modify_handler, (void *)mml,
1046 if (!rc && !mml->mml_modified)
1053 llog_close(env, loghandle);
1056 CERROR("%s: modify %s/%s failed: rc = %d\n",
1057 mgs->mgs_obd->obd_name, mti->mti_svname, comment, rc);
1058 llog_ctxt_put(ctxt);
1062 enum replace_state {
1070 /** This structure is passed to mgs_replace_handler */
1071 struct mgs_replace_data {
1072 /* Nids are replaced for this target device */
1073 struct mgs_target_info target;
1074 /* Temporary modified llog */
1075 struct llog_handle *temp_llh;
1076 enum replace_state state;
1082 * Check: a) if block should be skipped
1083 * b) is it target block
1088 * \retval 0 should not to be skipped
1089 * \retval 1 should to be skipped
1091 static int check_markers(struct lustre_cfg *lcfg,
1092 struct mgs_replace_data *mrd)
1094 struct cfg_marker *marker;
1096 /* Track markers. Find given device */
1097 if (lcfg->lcfg_command == LCFG_MARKER) {
1098 marker = lustre_cfg_buf(lcfg, 1);
1099 /* Clean llog from records marked as CM_SKIP.
1100 CM_EXCLUDE records are used for "active" command
1101 and can be restored if needed */
1102 if ((marker->cm_flags & (CM_SKIP | CM_START)) ==
1103 (CM_SKIP | CM_START)) {
1104 mrd->state = REPLACE_SKIP;
1108 if ((marker->cm_flags & (CM_SKIP | CM_END)) ==
1109 (CM_SKIP | CM_END)) {
1110 mrd->state = REPLACE_COPY;
1114 if (strcmp(mrd->target.mti_svname, marker->cm_tgtname) == 0) {
1115 LASSERT(!(marker->cm_flags & CM_START) ||
1116 !(marker->cm_flags & CM_END));
1117 if (marker->cm_flags & CM_START) {
1118 if (!strncmp(marker->cm_comment,
1119 "add failnid", 11)) {
1120 mrd->state = REPLACE_SKIP;
1122 mrd->state = REPLACE_UUID;
1123 mrd->failover = NULL;
1125 } else if (marker->cm_flags & CM_END)
1126 mrd->state = REPLACE_COPY;
1128 if (!strncmp(marker->cm_comment,
1138 int record_base_raw(const struct lu_env *env, struct llog_handle *llh,
1139 char *cfgname, __u32 num, __u32 flags, lnet_nid_t nid,
1140 int cmd, char *s1, char *s2, char *s3, char *s4)
1142 struct mgs_thread_info *mgi = mgs_env_info(env);
1143 struct llog_cfg_rec *lcr;
1146 CDEBUG(D_MGS, "lcfg %s %#x %s %s %s %s\n", cfgname,
1147 cmd, s1, s2, s3, s4);
1149 lustre_cfg_bufs_reset(&mgi->mgi_bufs, cfgname);
1151 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, s1);
1153 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 2, s2);
1155 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 3, s3);
1157 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 4, s4);
1159 lcr = lustre_cfg_rec_new(cmd, &mgi->mgi_bufs);
1163 lcr->lcr_cfg.lcfg_num = num;
1164 lcr->lcr_cfg.lcfg_flags = flags;
1165 lcr->lcr_cfg.lcfg_nid = nid;
1166 rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1168 lustre_cfg_rec_free(lcr);
1172 "failed to write lcfg %s %#x %s %s %s %s: rc = %d\n",
1173 cfgname, cmd, s1, s2, s3, s4, rc);
1178 int record_base(const struct lu_env *env, struct llog_handle *llh,
1179 char *cfgname, lnet_nid_t nid, int cmd,
1180 char *s1, char *s2, char *s3, char *s4)
1182 return record_base_raw(env, llh, cfgname, 0, 0, nid, cmd,
1186 static inline int record_add_uuid(const struct lu_env *env,
1187 struct llog_handle *llh,
1188 struct lnet_nid *nid, char *uuid)
1190 lnet_nid_t nid4 = 0;
1193 if (nid_is_nid4(nid))
1194 nid4 = lnet_nid_to_nid4(nid);
1196 cfg2 = libcfs_nidstr(nid);
1197 return record_base(env, llh, NULL, nid4, LCFG_ADD_UUID, uuid,
1201 static inline int record_add_conn(const struct lu_env *env,
1202 struct llog_handle *llh,
1203 char *devname, char *uuid)
1205 return record_base(env, llh, devname, 0, LCFG_ADD_CONN, uuid,
1209 static inline int record_attach(const struct lu_env *env,
1210 struct llog_handle *llh, char *devname,
1211 char *type, char *uuid)
1213 return record_base(env, llh, devname, 0, LCFG_ATTACH, type, uuid,
1217 static inline int record_setup(const struct lu_env *env,
1218 struct llog_handle *llh, char *devname,
1219 char *s1, char *s2, char *s3, char *s4)
1221 return record_base(env, llh, devname, 0, LCFG_SETUP, s1, s2, s3, s4);
1225 * \retval <0 record processing error
1226 * \retval n record is processed. No need copy original one.
1227 * \retval 0 record is not processed.
1229 static int process_command(const struct lu_env *env, struct lustre_cfg *lcfg,
1230 struct mgs_replace_data *mrd)
1233 struct lnet_nid nid;
1237 if (mrd->state == REPLACE_UUID &&
1238 lcfg->lcfg_command == LCFG_ADD_UUID) {
1239 /* LCFG_ADD_UUID command found. Let's skip original command
1240 and add passed nids */
1241 ptr = mrd->target.mti_params;
1242 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
1243 if (!mrd->nodeuuid) {
1244 rc = name_create(&mrd->nodeuuid,
1245 libcfs_nidstr(&nid), "");
1247 CERROR("Can't create uuid for "
1248 "nid %s, device %s\n",
1249 libcfs_nidstr(&nid),
1250 mrd->target.mti_svname);
1254 CDEBUG(D_MGS, "add nid %s with uuid %s, device %s\n",
1255 libcfs_nidstr(&nid),
1256 mrd->target.mti_params,
1258 rc = record_add_uuid(env,
1259 mrd->temp_llh, &nid,
1262 CWARN("%s: Can't add nid %s for uuid %s :rc=%d\n",
1263 mrd->target.mti_svname,
1264 libcfs_nidstr(&nid),
1270 mrd->failover = ptr;
1275 if (nids_added == 0) {
1276 CERROR("No new nids were added, nid %s with uuid %s, device %s\n",
1277 libcfs_nidstr(&nid),
1278 mrd->nodeuuid ? mrd->nodeuuid : "NULL",
1279 mrd->target.mti_svname);
1280 name_destroy(&mrd->nodeuuid);
1283 mrd->state = REPLACE_SETUP;
1289 if (mrd->state == REPLACE_SETUP && lcfg->lcfg_command == LCFG_SETUP) {
1290 /* LCFG_SETUP command found. UUID should be changed */
1291 rc = record_setup(env,
1293 /* devname the same */
1294 lustre_cfg_string(lcfg, 0),
1295 /* s1 is not changed */
1296 lustre_cfg_string(lcfg, 1),
1298 /* s3 is not changed */
1299 lustre_cfg_string(lcfg, 3),
1300 /* s4 is not changed */
1301 lustre_cfg_string(lcfg, 4));
1303 name_destroy(&mrd->nodeuuid);
1307 if (mrd->failover) {
1308 ptr = mrd->failover;
1309 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
1310 if (mrd->nodeuuid == NULL) {
1311 rc = name_create(&mrd->nodeuuid,
1312 libcfs_nidstr(&nid),
1318 CDEBUG(D_MGS, "add nid %s for failover %s\n",
1319 libcfs_nidstr(&nid), mrd->nodeuuid);
1320 rc = record_add_uuid(env, mrd->temp_llh, &nid,
1323 CWARN("%s: Can't add nid %s for failover %s :rc = %d\n",
1324 mrd->target.mti_svname,
1325 libcfs_nidstr(&nid),
1327 name_destroy(&mrd->nodeuuid);
1331 rc = record_add_conn(env,
1333 lustre_cfg_string(lcfg, 0),
1335 name_destroy(&mrd->nodeuuid);
1340 if (mrd->nodeuuid) {
1341 rc = record_add_conn(env, mrd->temp_llh,
1342 lustre_cfg_string(lcfg, 0),
1344 name_destroy(&mrd->nodeuuid);
1349 mrd->state = REPLACE_DONE;
1353 /* All new UUID are added. Skip. */
1354 if (mrd->state == REPLACE_SETUP &&
1355 lcfg->lcfg_command == LCFG_ADD_UUID)
1358 /* Another commands in target device block */
1363 * Handler that called for every record in llog.
1364 * Records are processed in order they placed in llog.
1366 * \param[in] llh log to be processed
1367 * \param[in] rec current record
1368 * \param[in] data mgs_replace_data structure
1372 static int mgs_replace_nids_handler(const struct lu_env *env,
1373 struct llog_handle *llh,
1374 struct llog_rec_hdr *rec,
1377 struct mgs_replace_data *mrd;
1378 struct lustre_cfg *lcfg = REC_DATA(rec);
1379 int cfg_len = REC_DATA_LEN(rec);
1383 mrd = (struct mgs_replace_data *)data;
1385 if (rec->lrh_type != OBD_CFG_REC) {
1386 CERROR("unhandled lrh_type: %#x, cmd %x %s %s\n",
1387 rec->lrh_type, lcfg->lcfg_command,
1388 lustre_cfg_string(lcfg, 0),
1389 lustre_cfg_string(lcfg, 1));
1393 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
1395 /* Do not copy any invalidated records */
1396 GOTO(skip_out, rc = 0);
1399 rc = check_markers(lcfg, mrd);
1400 if (rc || mrd->state == REPLACE_SKIP)
1401 GOTO(skip_out, rc = 0);
1403 /* Write to new log all commands outside target device block */
1404 if (mrd->state == REPLACE_COPY)
1405 GOTO(copy_out, rc = 0);
1407 if (mrd->state == REPLACE_DONE &&
1408 (lcfg->lcfg_command == LCFG_ADD_UUID ||
1409 lcfg->lcfg_command == LCFG_ADD_CONN)) {
1411 CWARN("Previous failover is deleted, but new one is "
1412 "not set. This means you configure system "
1413 "without failover or passed wrong replace_nids "
1414 "command parameters. Device %s, passed nids %s\n",
1415 mrd->target.mti_svname, mrd->target.mti_params);
1416 GOTO(skip_out, rc = 0);
1419 rc = process_command(env, lcfg, mrd);
1426 /* Record is placed in temporary llog as is */
1427 rc = llog_write(env, mrd->temp_llh, rec, LLOG_NEXT_IDX);
1429 CDEBUG(D_MGS, "Copied idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
1430 rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
1431 lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
1435 CDEBUG(D_MGS, "Skipped idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
1436 rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
1437 lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
1441 static int mgs_log_is_empty(const struct lu_env *env,
1442 struct mgs_device *mgs, char *name)
1444 struct llog_ctxt *ctxt;
1447 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
1448 LASSERT(ctxt != NULL);
1450 rc = llog_is_empty(env, ctxt, name);
1451 llog_ctxt_put(ctxt);
1455 static int mgs_replace_log(const struct lu_env *env,
1456 struct obd_device *mgs,
1457 char *logname, char *devname,
1458 llog_cb_t replace_handler, void *data)
1460 struct llog_handle *orig_llh, *backup_llh;
1461 struct llog_ctxt *ctxt;
1462 struct mgs_replace_data *mrd;
1463 struct mgs_device *mgs_dev = lu2mgs_dev(mgs->obd_lu_dev);
1464 static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" };
1466 int rc, rc2, buf_size;
1470 ctxt = llog_get_context(mgs, LLOG_CONFIG_ORIG_CTXT);
1471 LASSERT(ctxt != NULL);
1473 if (mgs_log_is_empty(env, mgs_dev, logname)) {
1474 /* Log is empty. Nothing to replace */
1475 GOTO(out_put, rc = 0);
1478 now = ktime_get_real_seconds();
1480 /* max time64_t in decimal fits into 20 bytes long string */
1481 buf_size = strlen(logname) + 1 + 20 + 1 + strlen(".bak") + 1;
1482 OBD_ALLOC(backup, buf_size);
1484 GOTO(out_put, rc = -ENOMEM);
1486 snprintf(backup, buf_size, "%s.%llu.bak", logname, now);
1488 rc = llog_backup(env, mgs, ctxt, ctxt, logname, backup);
1490 /* Now erase original log file. Connections are not allowed.
1491 Backup is already saved */
1492 rc = llog_erase(env, ctxt, NULL, logname);
1495 } else if (rc != -ENOENT) {
1496 CERROR("%s: can't make backup for %s: rc = %d\n",
1497 mgs->obd_name, logname, rc);
1501 /* open local log */
1502 rc = llog_open_create(env, ctxt, &orig_llh, NULL, logname);
1504 GOTO(out_restore, rc);
1506 rc = llog_init_handle(env, orig_llh, LLOG_F_IS_PLAIN, &cfg_uuid);
1508 GOTO(out_closel, rc);
1510 /* open backup llog */
1511 rc = llog_open(env, ctxt, &backup_llh, NULL, backup,
1514 GOTO(out_closel, rc);
1516 rc = llog_init_handle(env, backup_llh, LLOG_F_IS_PLAIN, NULL);
1518 GOTO(out_close, rc);
1520 if (llog_get_size(backup_llh) <= 1)
1521 GOTO(out_close, rc = 0);
1525 GOTO(out_close, rc = -ENOMEM);
1526 /* devname is only needed information to replace UUID records */
1528 strscpy(mrd->target.mti_svname, devname,
1529 sizeof(mrd->target.mti_svname));
1530 /* data is parsed in llog callback */
1532 strscpy(mrd->target.mti_params, data,
1533 sizeof(mrd->target.mti_params));
1534 /* Copy records to this temporary llog */
1535 mrd->temp_llh = orig_llh;
1537 rc = llog_process(env, backup_llh, replace_handler,
1541 rc2 = llog_close(NULL, backup_llh);
1545 rc2 = llog_close(NULL, orig_llh);
1551 CERROR("%s: llog should be restored: rc = %d\n",
1553 rc2 = llog_backup(env, mgs, ctxt, ctxt, backup,
1556 CERROR("%s: can't restore backup %s: rc = %d\n",
1557 mgs->obd_name, logname, rc2);
1561 OBD_FREE(backup, buf_size);
1564 llog_ctxt_put(ctxt);
1567 CERROR("%s: failed to replace log %s: rc = %d\n",
1568 mgs->obd_name, logname, rc);
1573 static int mgs_replace_nids_log(const struct lu_env *env,
1574 struct obd_device *obd,
1575 char *logname, char *devname, char *nids)
1577 CDEBUG(D_MGS, "Replace NIDs for %s in %s\n", devname, logname);
1578 return mgs_replace_log(env, obd, logname, devname,
1579 mgs_replace_nids_handler, nids);
1583 * Parse device name and get file system name and/or device index
1585 * @devname device name (ex. lustre-MDT0000)
1586 * @fsname file system name extracted from @devname and returned
1587 * to the caller (optional)
1588 * @index device index extracted from @devname and returned to
1589 * the caller (optional)
1591 * RETURN 0 success if we are only interested in
1592 * extracting fsname from devname.
1595 * LDD_F_SV_TYPE_* Besides extracting the fsname the
1596 * user also wants the index. Report to
1597 * the user the type of obd device the
1598 * returned index belongs too.
1600 * -EINVAL The obd device name is improper so
1601 * fsname could not be extracted.
1603 * -ENXIO Failed to extract the index out of
1604 * the obd device name. Most likely an
1605 * invalid obd device name
1607 static int mgs_parse_devname(char *devname, char *fsname, u32 *index)
1612 /* Extract fsname */
1614 rc = server_name2fsname(devname, fsname, NULL);
1616 CDEBUG(D_MGS, "Device name %s without fsname\n",
1623 rc = server_name2index(devname, index, NULL);
1625 CDEBUG(D_MGS, "Device name %s with wrong index\n",
1631 /* server_name2index can return LDD_F_SV_TYPE_* so always return rc */
1635 /* This is only called during replace_nids */
1636 static int only_mgs_is_running(struct obd_device *mgs_obd)
1638 int num_devices = class_obd_devs_count();
1639 int num_exports = 0;
1640 struct obd_export *exp;
1642 spin_lock(&mgs_obd->obd_dev_lock);
1643 list_for_each_entry(exp, &mgs_obd->obd_exports, exp_obd_chain) {
1644 /* skip self export */
1645 if (exp == mgs_obd->obd_self_export)
1650 if (num_exports > 1)
1651 CERROR("%s: node %s still connected during replace_nids connect_flags:%llx\n",
1653 libcfs_nidstr(&exp->exp_nid_stats->nid),
1654 exp_connect_flags(exp));
1656 spin_unlock(&mgs_obd->obd_dev_lock);
1658 /* osd, MGS and MGC + MGC export (nosvc starts MGC)
1659 * (wc -l /proc/fs/lustre/devices <= 3) && (non self exports == 1)
1661 return (num_devices <= 3) && (num_exports <= 1);
1664 static int name_create_mdt(char **logname, char *fsname, int mdt_idx)
1668 if (mdt_idx > INDEX_MAP_MAX_VALUE)
1671 snprintf(postfix, sizeof(postfix), "-MDT%04x", mdt_idx);
1672 return name_create(logname, fsname, postfix);
1676 * Replace nids for \a device to \a nids values
1678 * \param obd MGS obd device
1679 * \param devname nids need to be replaced for this device
1680 * (ex. lustre-OST0000)
1681 * \param nids nids list (ex. nid1,nid2,nid3)
1685 int mgs_replace_nids(const struct lu_env *env,
1686 struct mgs_device *mgs,
1687 char *devname, char *nids)
1689 /* Assume fsname is part of device name */
1690 char fsname[MTI_NAME_MAXLEN];
1694 struct fs_db *fsdb = NULL;
1697 struct obd_device *mgs_obd = mgs->mgs_obd;
1700 /* We can only change NIDs if no other nodes are connected */
1701 spin_lock(&mgs_obd->obd_dev_lock);
1702 conn_state = mgs_obd->obd_no_conn;
1703 mgs_obd->obd_no_conn = 1;
1704 spin_unlock(&mgs_obd->obd_dev_lock);
1706 /* We can not change nids if not only MGS is started */
1707 if (!only_mgs_is_running(mgs_obd)) {
1708 CERROR("Only MGS is allowed to be started\n");
1709 GOTO(out, rc = -EINPROGRESS);
1712 /* Get fsname and index */
1713 rc = mgs_parse_devname(devname, fsname, &index);
1717 rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
1719 CERROR("%s: can't find fsdb: rc = %d\n", fsname, rc);
1723 /* Process client llogs */
1724 rc = name_create(&logname, fsname, "-client");
1727 rc = mgs_replace_nids_log(env, mgs_obd, logname, devname, nids);
1728 name_destroy(&logname);
1730 CERROR("%s: error while replacing NIDs for %s: rc = %d\n",
1731 fsname, devname, rc);
1735 /* Process MDT llogs */
1736 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
1737 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
1739 rc = name_create_mdt(&logname, fsname, i);
1742 rc = mgs_replace_nids_log(env, mgs_obd, logname, devname, nids);
1743 name_destroy(&logname);
1749 spin_lock(&mgs_obd->obd_dev_lock);
1750 mgs_obd->obd_no_conn = conn_state;
1751 spin_unlock(&mgs_obd->obd_dev_lock);
1754 mgs_put_fsdb(mgs, fsdb);
1760 * This is called for every record in llog. Some of records are
1761 * skipped, others are copied to new log as is.
1762 * Records to be skipped are
1763 * marker records marked SKIP
1764 * records enclosed between SKIP markers
1766 * \param[in] llh log to be processed
1767 * \param[in] rec current record
1768 * \param[in] data mgs_replace_data structure
1772 static int mgs_clear_config_handler(const struct lu_env *env,
1773 struct llog_handle *llh,
1774 struct llog_rec_hdr *rec, void *data)
1776 struct mgs_replace_data *mrd;
1777 struct lustre_cfg *lcfg = REC_DATA(rec);
1778 int cfg_len = REC_DATA_LEN(rec);
1783 mrd = (struct mgs_replace_data *)data;
1785 if (rec->lrh_type != OBD_CFG_REC) {
1786 CDEBUG(D_MGS, "Config llog Name=%s, Record Index=%u, "
1787 "Unhandled Record Type=%#x\n", llh->lgh_name,
1788 rec->lrh_index, rec->lrh_type);
1792 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
1794 CDEBUG(D_MGS, "Config llog Name=%s, Invalid config file.",
1799 if (lcfg->lcfg_command == LCFG_MARKER) {
1800 struct cfg_marker *marker;
1802 marker = lustre_cfg_buf(lcfg, 1);
1803 if (marker->cm_flags & CM_SKIP) {
1804 if (marker->cm_flags & CM_START)
1805 mrd->state = REPLACE_SKIP;
1806 if (marker->cm_flags & CM_END)
1807 mrd->state = REPLACE_COPY;
1808 /* SKIP section started or finished */
1809 CDEBUG(D_MGS, "Skip idx=%d, rc=%d, len=%d, "
1810 "cmd %x %s %s\n", rec->lrh_index, rc,
1811 rec->lrh_len, lcfg->lcfg_command,
1812 lustre_cfg_string(lcfg, 0),
1813 lustre_cfg_string(lcfg, 1));
1817 if (mrd->state == REPLACE_SKIP) {
1818 /* record enclosed between SKIP markers, skip it */
1819 CDEBUG(D_MGS, "Skip idx=%d, rc=%d, len=%d, "
1820 "cmd %x %s %s\n", rec->lrh_index, rc,
1821 rec->lrh_len, lcfg->lcfg_command,
1822 lustre_cfg_string(lcfg, 0),
1823 lustre_cfg_string(lcfg, 1));
1828 /* Record is placed in temporary llog as is */
1829 rc = llog_write(env, mrd->temp_llh, rec, LLOG_NEXT_IDX);
1831 CDEBUG(D_MGS, "Copied idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
1832 rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
1833 lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
1838 * Directory CONFIGS/ may contain files which are not config logs to
1839 * be cleared. Skip any llogs with a non-alphanumeric character after
1840 * the last '-'. For example, fsname-MDT0000.sav, fsname-MDT0000.bak,
1841 * fsname-MDT0000.orig, fsname-MDT0000~, fsname-MDT0000.20150516, etc.
1843 static bool config_to_clear(const char *logname)
1848 str = strrchr(logname, '-');
1853 while (isalnum(str[++i]));
1854 return str[i] == '\0';
1858 * Clear config logs for \a name
1861 * \param mgs MGS device
1862 * \param name name of device or of filesystem
1863 * (ex. lustre-OST0000 or lustre) in later case all logs
1868 int mgs_clear_configs(const struct lu_env *env,
1869 struct mgs_device *mgs, const char *name)
1871 struct list_head dentry_list;
1872 struct mgs_direntry *dirent, *n;
1875 struct obd_device *mgs_obd = mgs->mgs_obd;
1880 /* Prevent clients and servers from connecting to mgs */
1881 spin_lock(&mgs_obd->obd_dev_lock);
1882 conn_state = mgs_obd->obd_no_conn;
1883 mgs_obd->obd_no_conn = 1;
1884 spin_unlock(&mgs_obd->obd_dev_lock);
1887 * config logs cannot be cleaned if anything other than
1890 if (!only_mgs_is_running(mgs_obd)) {
1891 CERROR("Only MGS is allowed to be started\n");
1892 GOTO(out, rc = -EBUSY);
1895 /* Find all the logs in the CONFIGS directory */
1896 rc = class_dentry_readdir(env, mgs, &dentry_list);
1898 CERROR("%s: cannot read config directory '%s': rc = %d\n",
1899 mgs_obd->obd_name, MOUNT_CONFIGS_DIR, rc);
1903 if (list_empty(&dentry_list)) {
1904 CERROR("%s: list empty reading config dir '%s': rc = %d\n",
1905 mgs_obd->obd_name, MOUNT_CONFIGS_DIR, -ENOENT);
1906 GOTO(out, rc = -ENOENT);
1909 OBD_ALLOC(namedash, strlen(name) + 2);
1910 if (namedash == NULL)
1911 GOTO(out, rc = -ENOMEM);
1912 snprintf(namedash, strlen(name) + 2, "%s-", name);
1914 list_for_each_entry(dirent, &dentry_list, mde_list) {
1915 if (strcmp(name, dirent->mde_name) &&
1916 strncmp(namedash, dirent->mde_name, strlen(namedash)))
1918 if (!config_to_clear(dirent->mde_name))
1920 CDEBUG(D_MGS, "%s: Clear config log %s\n",
1921 mgs_obd->obd_name, dirent->mde_name);
1922 rc = mgs_replace_log(env, mgs_obd, dirent->mde_name, NULL,
1923 mgs_clear_config_handler, NULL);
1928 list_for_each_entry_safe(dirent, n, &dentry_list, mde_list) {
1929 list_del_init(&dirent->mde_list);
1930 mgs_direntry_free(dirent);
1932 OBD_FREE(namedash, strlen(name) + 2);
1934 spin_lock(&mgs_obd->obd_dev_lock);
1935 mgs_obd->obd_no_conn = conn_state;
1936 spin_unlock(&mgs_obd->obd_dev_lock);
1941 static int record_lov_setup(const struct lu_env *env, struct llog_handle *llh,
1942 char *devname, struct lov_desc *desc)
1944 struct mgs_thread_info *mgi = mgs_env_info(env);
1945 struct llog_cfg_rec *lcr;
1948 lustre_cfg_bufs_reset(&mgi->mgi_bufs, devname);
1949 lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, desc, sizeof(*desc));
1950 lcr = lustre_cfg_rec_new(LCFG_SETUP, &mgi->mgi_bufs);
1954 rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1955 lustre_cfg_rec_free(lcr);
1959 static int record_lmv_setup(const struct lu_env *env, struct llog_handle *llh,
1960 char *devname, struct lmv_desc *desc)
1962 struct mgs_thread_info *mgi = mgs_env_info(env);
1963 struct llog_cfg_rec *lcr;
1966 lustre_cfg_bufs_reset(&mgi->mgi_bufs, devname);
1967 lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, desc, sizeof(*desc));
1968 lcr = lustre_cfg_rec_new(LCFG_SETUP, &mgi->mgi_bufs);
1972 rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1973 lustre_cfg_rec_free(lcr);
1977 static inline int record_mdc_add(const struct lu_env *env,
1978 struct llog_handle *llh,
1979 char *logname, char *mdcuuid,
1980 char *mdtuuid, char *index,
1983 return record_base(env,llh,logname,0,LCFG_ADD_MDC,
1984 mdtuuid, index, gen, mdcuuid);
1987 static inline int record_lov_add(const struct lu_env *env,
1988 struct llog_handle *llh,
1989 char *lov_name, char *ost_uuid,
1990 char *index, char *gen)
1992 return record_base(env, llh, lov_name, 0, LCFG_LOV_ADD_OBD,
1993 ost_uuid, index, gen, NULL);
1996 static inline int record_mount_opt(const struct lu_env *env,
1997 struct llog_handle *llh,
1998 char *profile, char *lov_name,
2001 return record_base(env, llh, NULL, 0, LCFG_MOUNTOPT,
2002 profile, lov_name, mdc_name, NULL);
2005 static int record_marker(const struct lu_env *env,
2006 struct llog_handle *llh,
2007 struct fs_db *fsdb, __u32 flags,
2008 char *tgtname, char *comment)
2010 struct mgs_thread_info *mgi = mgs_env_info(env);
2011 struct llog_cfg_rec *lcr;
2015 if (flags & CM_START)
2017 mgi->mgi_marker.cm_step = fsdb->fsdb_gen;
2018 mgi->mgi_marker.cm_flags = flags;
2019 mgi->mgi_marker.cm_vers = LUSTRE_VERSION_CODE;
2020 cplen = strscpy(mgi->mgi_marker.cm_tgtname, tgtname,
2021 sizeof(mgi->mgi_marker.cm_tgtname));
2024 cplen = strscpy(mgi->mgi_marker.cm_comment, comment,
2025 sizeof(mgi->mgi_marker.cm_comment));
2028 mgi->mgi_marker.cm_createtime = ktime_get_real_seconds();
2029 mgi->mgi_marker.cm_canceltime = 0;
2030 lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL);
2031 lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, &mgi->mgi_marker,
2032 sizeof(mgi->mgi_marker));
2033 lcr = lustre_cfg_rec_new(LCFG_MARKER, &mgi->mgi_bufs);
2037 rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
2038 lustre_cfg_rec_free(lcr);
2042 static int record_start_log(const struct lu_env *env, struct mgs_device *mgs,
2043 struct llog_handle **llh, char *name)
2045 static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" };
2046 struct llog_ctxt *ctxt;
2051 GOTO(out, rc = -EBUSY);
2053 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
2055 GOTO(out, rc = -ENODEV);
2056 LASSERT(ctxt->loc_obd == mgs->mgs_obd);
2058 rc = llog_open_create(env, ctxt, llh, NULL, name);
2061 rc = llog_init_handle(env, *llh, LLOG_F_IS_PLAIN, &cfg_uuid);
2063 llog_close(env, *llh);
2065 llog_ctxt_put(ctxt);
2068 CERROR("%s: can't start log %s: rc = %d\n",
2069 mgs->mgs_obd->obd_name, name, rc);
2075 static int record_end_log(const struct lu_env *env, struct llog_handle **llh)
2079 rc = llog_close(env, *llh);
2085 /******************** config "macros" *********************/
2087 /* write an lcfg directly into a log (with markers) */
2088 static int mgs_write_log_direct(const struct lu_env *env,
2089 struct mgs_device *mgs, struct fs_db *fsdb,
2090 char *logname, struct llog_cfg_rec *lcr,
2091 char *devname, char *comment)
2093 struct llog_handle *llh = NULL;
2098 rc = record_start_log(env, mgs, &llh, logname);
2102 /* FIXME These should be a single journal transaction */
2103 rc = record_marker(env, llh, fsdb, CM_START, devname, comment);
2106 rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
2109 rc = record_marker(env, llh, fsdb, CM_END, devname, comment);
2113 record_end_log(env, &llh);
2117 /* write the lcfg in all logs for the given fs */
2118 static int mgs_write_log_direct_all(const struct lu_env *env,
2119 struct mgs_device *mgs,
2121 struct mgs_target_info *mti,
2122 struct llog_cfg_rec *lcr, char *devname,
2123 char *comment, int server_only)
2125 struct list_head log_list;
2126 struct mgs_direntry *dirent, *n;
2127 char *fsname = mti->mti_fsname;
2128 int rc = 0, len = strlen(fsname);
2131 /* Find all the logs in the CONFIGS directory */
2132 rc = class_dentry_readdir(env, mgs, &log_list);
2136 /* Could use fsdb index maps instead of directory listing */
2137 list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
2138 list_del_init(&dirent->mde_list);
2139 /* don't write to sptlrpc rule log */
2140 if (strstr(dirent->mde_name, "-sptlrpc") != NULL)
2143 /* caller wants write server logs only */
2144 if (server_only && strstr(dirent->mde_name, "-client") != NULL)
2147 if (strlen(dirent->mde_name) <= len ||
2148 strncmp(fsname, dirent->mde_name, len) != 0 ||
2149 dirent->mde_name[len] != '-')
2152 CDEBUG(D_MGS, "Changing log %s\n", dirent->mde_name);
2153 /* Erase any old settings of this same parameter */
2154 rc = mgs_modify(env, mgs, fsdb, mti, dirent->mde_name,
2155 devname, comment, CM_SKIP);
2157 CERROR("%s: Can't modify llog %s: rc = %d\n",
2158 mgs->mgs_obd->obd_name, dirent->mde_name, rc);
2161 /* Write the new one */
2162 rc = mgs_write_log_direct(env, mgs, fsdb, dirent->mde_name,
2163 lcr, devname, comment);
2165 CERROR("%s: writing log %s: rc = %d\n",
2166 mgs->mgs_obd->obd_name, dirent->mde_name, rc);
2168 mgs_direntry_free(dirent);
2175 * Replace a mdt name in MDT configuration name
2176 * mdtname should be formated like this: <fsname>-MDT<mdtindex>
2179 int mgs_replace_mdtname(char *name, size_t size, char *mdtsrc, char *mdtnew)
2181 char *ptr, *src, *new;
2187 src = strstr(mdtsrc, "-MDT");
2188 new = strstr(mdtnew, "-MDT");
2193 if (strncmp(name, mdtsrc, n) != 0)
2195 if (name[n] == '\0')
2201 ptr = strstr(name, src);
2203 char *end = ptr + n;
2205 if (*end == '\0' || *end == '-' || *end == '_')
2206 strncpy(ptr, new, n);
2208 ptr = strstr(ptr, src);
2215 bool mgs_copy_skipped(struct cfg_marker *marker, struct mgs_target_info *mti,
2218 size_t fsname_len = strlen(mti->mti_fsname);
2219 char *comment = marker->cm_comment;
2220 char *tgtname = marker->cm_tgtname;
2222 if (marker->cm_flags & CM_SKIP)
2225 /* fsname does not match? */
2226 if (strncmp(tgtname, mti->mti_fsname, fsname_len) != 0)
2229 if (tgtname[fsname_len] != '\0' && tgtname[fsname_len] != '-')
2232 /* filter out existing sections */
2233 if (strncmp(comment, "add mdt", 7) == 0 ||
2234 strncmp(comment, "lov setup", 9) == 0 ||
2235 strncmp(comment, "add osp", 7) == 0 ||
2236 strncmp(comment, "add osc", 7) == 0 ||
2237 strncmp(comment, "add failnid", 11) == 0)
2240 /* exclude paramater for invalid osp devices (--writeconf case) */
2241 if (strncmp(tgtname, mti->mti_svname, strlen(mti->mti_svname)) == 0)
2248 * Update the marker->cm_tgtname with the new MDT target name:
2249 * marker ... lustre-MDT0000 'mdt.identity_upcall'
2250 * --> marker ... lustre-MDT0003 'mdt.identity_upcall'
2253 void mgs_copy_update_marker(char *fsname, struct cfg_marker *marker,
2254 char *mdtsrc, char *mdtnew)
2256 mgs_replace_mdtname(marker->cm_tgtname, sizeof(marker->cm_tgtname),
2260 struct mgs_copy_data {
2261 struct llog_handle *mcd_llh;
2262 struct mgs_target_info *mcd_mti;
2263 struct fs_db *mcd_fsdb;
2268 * Walk through MDT config log records and convert the related records
2271 * The osp sections (add osp/add osc) are excluded. Those sections are
2272 * generated from client configuration by mgs_steal_osp_rec_from_client()
2274 static int mgs_copy_mdt_llog_handler(const struct lu_env *env,
2275 struct llog_handle *llh,
2276 struct llog_rec_hdr *rec, void *data)
2278 struct mgs_copy_data *mcd = data;
2279 struct mgs_target_info *mti = mcd->mcd_mti;
2280 struct llog_handle *mdt_llh = mcd->mcd_llh;
2281 struct fs_db *fsdb = mcd->mcd_fsdb;
2282 int cfg_len = rec->lrh_len;
2283 char *cfg_buf = (char *)(rec + 1);
2284 struct lustre_cfg *lcfg;
2285 char *mdtsrc = llh->lgh_name;
2292 if (rec->lrh_type != OBD_CFG_REC) {
2293 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
2297 rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
2299 CERROR("Insane cfg\n");
2303 lcfg = (struct lustre_cfg *)cfg_buf;
2305 if (lcfg->lcfg_command == LCFG_MARKER) {
2306 struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1);
2308 if (marker->cm_flags & CM_START)
2309 mcd->mcd_skip = mgs_copy_skipped(marker, mti, mdtsrc);
2314 mgs_copy_update_marker(mti->mti_fsname, marker,
2315 mdtsrc, mti->mti_svname);
2316 rc = record_marker(env, mdt_llh, fsdb, marker->cm_flags,
2317 marker->cm_tgtname, marker->cm_comment);
2321 if (marker->cm_flags & CM_END)
2322 mcd->mcd_skip = true;
2330 /* init cfg strings with the client record values */
2331 for (i = 0; i < 5; i++)
2332 s[i] = lustre_cfg_buf(lcfg, i);
2334 /* convert records with the new target name */
2335 switch (lcfg->lcfg_command) {
2344 case LCFG_LOV_ADD_INA:
2345 case LCFG_LOV_ADD_OBD:
2346 case LCFG_LOV_DEL_OBD:
2350 rc = mgs_replace_mdtname(s[0], LUSTRE_CFG_BUFLEN(lcfg, 0),
2351 mdtsrc, mti->mti_svname);
2357 rc = record_base_raw(env, mdt_llh, s[0],
2358 lcfg->lcfg_num, lcfg->lcfg_flags, lcfg->lcfg_nid,
2359 lcfg->lcfg_command, s[1], s[2], s[3], s[4]);
2365 /* copy an existing MDT configuration records for a new MDT configuration */
2366 static int mgs_copy_llog_from_mdt(const struct lu_env *env,
2367 struct mgs_device *mgs,
2368 struct mgs_target_info *mti,
2371 char *logname = NULL;
2372 struct llog_handle *loghandle;
2373 struct llog_ctxt *ctxt;
2374 struct mgs_copy_data mcd = { 0 };
2380 if (fsdb->fsdb_mdt_count < 2)
2383 for_each_set_bit(i, fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE) {
2384 if (mti->mti_stripe_index == i)
2387 rc = name_create_mdt(&logname, fsdb->fsdb_name, i);
2391 if (mgs_log_is_empty(env, mgs, logname)) {
2392 name_destroy(&logname);
2399 /* is there an existing MDT configuration? */
2403 /* check if MDT source name is valid */
2404 if (strncmp(mti->mti_svname, logname, strlen(mti->mti_fsname) + 1) != 0)
2405 GOTO(out_free, rc = -EINVAL);
2406 if (strlen(mti->mti_svname) != strlen(logname))
2407 GOTO(out_free, rc = -EINVAL);
2409 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
2410 LASSERT(ctxt != NULL);
2412 rc = llog_open(env, ctxt, &loghandle, NULL, logname,
2417 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
2419 GOTO(out_close, rc);
2422 mcd.mcd_fsdb = fsdb;
2423 mcd.mcd_skip = true;
2425 rc = record_start_log(env, mgs, &mcd.mcd_llh, mti->mti_svname);
2427 GOTO(out_close, rc);
2429 rc = llog_process_or_fork(env, loghandle, mgs_copy_mdt_llog_handler,
2432 CWARN("%s: Fail to copy records from %s configuration: rc = %d\n",
2433 mti->mti_svname, logname, rc);
2435 record_end_log(env, &mcd.mcd_llh);
2439 llog_close(env, loghandle);
2441 llog_ctxt_put(ctxt);
2443 name_destroy(&logname);
2448 struct mgs_steal_data {
2449 struct llog_handle *msd_llh;
2450 struct fs_db *msd_fsdb;
2451 struct mgs_target_info *msd_mti;
2460 int mgs_steal_update_names(char *tgtname, struct mgs_steal_data *msd)
2462 struct mgs_target_info *mti = msd->msd_mti;
2465 name_destroy(&msd->msd_ospname);
2466 name_destroy(&msd->msd_lovname);
2467 name_destroy(&msd->msd_lovuuid);
2469 rc = name_create_osp(&msd->msd_ospname, &msd->msd_devtype, tgtname,
2470 mti->mti_stripe_index);
2474 rc = name_create_lov(&msd->msd_lovname, mti->mti_svname);
2478 rc = name_create(&msd->msd_lovuuid, msd->msd_lovname, "_UUID");
2485 name_destroy(&msd->msd_lovname);
2487 name_destroy(&msd->msd_ospname);
2493 int mgs_steal_skipped(struct mgs_steal_data *msd,
2494 struct cfg_marker *marker)
2496 char *tgtname = marker->cm_tgtname;
2497 char *comment = marker->cm_comment;
2498 char *fsname = msd->msd_mti->mti_fsname;
2499 size_t fsname_len = strlen(fsname);
2501 if ((marker->cm_flags & CM_SKIP) ||
2502 strncmp(tgtname, fsname, fsname_len) != 0 ||
2503 tgtname[fsname_len] != '-')
2506 if (strncmp(comment, "add mdc", 7) != 0 &&
2507 strncmp(comment, "add osc", 7) != 0 &&
2508 strncmp(comment, "add failnid", 11) != 0)
2511 /* check for invalid osp devices (--writeconf case) */
2512 if (strcmp(tgtname, msd->msd_mti->mti_svname) == 0)
2519 * Walk through client config log for mdc/osc records and convert the related
2520 * records in osp records for the new MDT target.
2522 static int mgs_steal_client_llog_handler(const struct lu_env *env,
2523 struct llog_handle *llh,
2524 struct llog_rec_hdr *rec, void *data)
2526 struct mgs_steal_data *msd = data;
2527 struct fs_db *fsdb = msd->msd_fsdb;
2528 int cfg_len = rec->lrh_len;
2529 char *cfg_buf = (char *)(rec + 1);
2530 struct lustre_cfg *lcfg;
2537 if (rec->lrh_type != OBD_CFG_REC) {
2538 CERROR("%s: unhandled lrh_type %#x: rc = %d\n",
2539 llh->lgh_name, rec->lrh_type, -EINVAL);
2543 rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
2545 CERROR("%s: insane cfg: rc = %d\n", llh->lgh_name, rc);
2549 lcfg = (struct lustre_cfg *)cfg_buf;
2551 if (lcfg->lcfg_command == LCFG_MARKER) {
2552 struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1);
2553 char *comment = marker->cm_comment;
2555 if (marker->cm_flags & CM_START) {
2556 msd->msd_skip = mgs_steal_skipped(msd, marker);
2560 rc = mgs_steal_update_names(marker->cm_tgtname, msd);
2568 if (strncmp(comment, "add mdc", 7) == 0)
2569 comment = "add osp";
2571 rc = record_marker(env, msd->msd_llh, fsdb, marker->cm_flags,
2572 marker->cm_tgtname, comment);
2576 if (marker->cm_flags & CM_END)
2577 msd->msd_skip = true;
2585 /* init cfg strings with the client record values */
2586 for (i = 0; i < 5; i++)
2587 s[i] = lustre_cfg_buf(lcfg, i);
2589 /* convert the mdc records to osp */
2590 switch (lcfg->lcfg_command) {
2594 s[1] = msd->msd_devtype;
2595 s[2] = msd->msd_lovuuid;
2602 s[0] = msd->msd_ospname;
2606 case LCFG_LOV_ADD_INA:
2607 case LCFG_LOV_ADD_OBD:
2608 case LCFG_LOV_DEL_OBD:
2609 s[0] = msd->msd_lovname;
2615 rc = record_base_raw(env, msd->msd_llh, s[0],
2616 lcfg->lcfg_num, lcfg->lcfg_flags, lcfg->lcfg_nid,
2617 lcfg->lcfg_command, s[1], s[2], s[3], s[4]);
2623 * Steal mdc/osc records from the client configuration and convert them to
2624 * create osp devices (OST and MDT) for a new MDT configuration.
2626 * fsdb->fsdb_mutex is already held in mgs_write_log_target
2627 * stealed from mgs_get_fsdb_from_llog
2629 static int mgs_steal_osp_rec_from_client(const struct lu_env *env,
2630 struct mgs_device *mgs,
2632 struct mgs_target_info *mti,
2635 struct llog_handle *loghandle;
2636 struct llog_ctxt *ctxt;
2637 struct mgs_steal_data msd = { 0 };
2642 if (mgs_log_is_empty(env, mgs, client_name))
2645 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
2646 LASSERT(ctxt != NULL);
2648 rc = llog_open(env, ctxt, &loghandle, NULL, client_name,
2653 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
2655 GOTO(out_close, rc);
2657 msd.msd_fsdb = fsdb;
2658 msd.msd_skip = true;
2661 rc = record_start_log(env, mgs, &msd.msd_llh, mti->mti_svname);
2663 GOTO(out_close, rc);
2665 rc = llog_process_or_fork(env, loghandle, mgs_steal_client_llog_handler,
2668 CWARN("%s: Fail to generate osp devices from %s : rc = %d\n",
2669 mti->mti_svname, client_name, rc);
2671 record_end_log(env, &msd.msd_llh);
2673 name_destroy(&msd.msd_ospname);
2674 name_destroy(&msd.msd_lovname);
2675 name_destroy(&msd.msd_lovuuid);
2678 llog_close(env, loghandle);
2680 llog_ctxt_put(ctxt);
2685 /* mount opt is the third thing in client logs */
2686 static int mgs_write_log_mount_opt(const struct lu_env *env,
2687 struct mgs_device *mgs, struct fs_db *fsdb,
2690 struct llog_handle *llh = NULL;
2695 CDEBUG(D_MGS, "Writing mount options log for %s\n", logname);
2697 rc = record_start_log(env, mgs, &llh, logname);
2701 rc = record_marker(env, llh, fsdb, CM_START, logname, "mount opts");
2704 rc = record_mount_opt(env, llh, logname, fsdb->fsdb_clilov,
2708 rc = record_marker(env, llh, fsdb, CM_END, logname, "mount opts");
2712 record_end_log(env, &llh);
2716 /* lmv is the second thing for client logs */
2717 /* copied from mgs_write_log_lov. Please refer to that. */
2718 static int mgs_write_log_lmv(const struct lu_env *env,
2719 struct mgs_device *mgs,
2721 struct mgs_target_info *mti,
2722 char *logname, char *lmvname)
2724 struct llog_handle *llh = NULL;
2725 struct lmv_desc *lmvdesc;
2730 CDEBUG(D_MGS, "Writing lmv(%s) log for %s\n", lmvname, logname);
2732 OBD_ALLOC_PTR(lmvdesc);
2733 if (lmvdesc == NULL)
2735 lmvdesc->ld_active_tgt_count = 0;
2736 lmvdesc->ld_tgt_count = 0;
2737 sprintf((char *)lmvdesc->ld_uuid.uuid, "%s_UUID", lmvname);
2738 uuid = (char *)lmvdesc->ld_uuid.uuid;
2740 rc = record_start_log(env, mgs, &llh, logname);
2743 rc = record_marker(env, llh, fsdb, CM_START, lmvname, "lmv setup");
2746 rc = record_attach(env, llh, lmvname, "lmv", uuid);
2749 rc = record_lmv_setup(env, llh, lmvname, lmvdesc);
2752 rc = record_marker(env, llh, fsdb, CM_END, lmvname, "lmv setup");
2756 record_end_log(env, &llh);
2758 OBD_FREE_PTR(lmvdesc);
2762 /* lov is the first thing in the mdt and client logs */
2763 static int mgs_write_log_lov(const struct lu_env *env, struct mgs_device *mgs,
2764 struct fs_db *fsdb, struct mgs_target_info *mti,
2765 char *logname, char *lovname)
2767 struct llog_handle *llh = NULL;
2768 struct lov_desc *lovdesc;
2773 CDEBUG(D_MGS, "Writing lov(%s) log for %s\n", lovname, logname);
2776 * #01 L attach 0:lov_mdsA 1:lov 2:71ccb_lov_mdsA_19f961a9e1
2777 * #02 L lov_setup 0:lov_mdsA 1:(struct lov_desc)
2778 * uuid=lov1_UUID, stripe count=1, size=1048576, offset=0, pattern=0
2781 /* FIXME just make lov_setup accept empty desc (put uuid in buf 2) */
2782 OBD_ALLOC_PTR(lovdesc);
2783 if (lovdesc == NULL)
2785 lovdesc->ld_magic = LOV_DESC_MAGIC;
2786 lovdesc->ld_tgt_count = 0;
2787 /* Defaults. Can be changed later by lcfg config_param */
2788 lovdesc->ld_default_stripe_count = 1;
2789 lovdesc->ld_pattern = LOV_PATTERN_RAID0;
2790 lovdesc->ld_default_stripe_size = LOV_DESC_STRIPE_SIZE_DEFAULT;
2791 lovdesc->ld_default_stripe_offset = -1;
2792 lovdesc->ld_qos_maxage = LOV_DESC_QOS_MAXAGE_DEFAULT;
2793 sprintf((char *)lovdesc->ld_uuid.uuid, "%s_UUID", lovname);
2794 /* can these be the same? */
2795 uuid = (char *)lovdesc->ld_uuid.uuid;
2797 /* This should always be the first entry in a log.
2798 * rc = mgs_clear_log(obd, logname);
2800 rc = record_start_log(env, mgs, &llh, logname);
2803 /* FIXME these should be a single journal transaction */
2804 rc = record_marker(env, llh, fsdb, CM_START, lovname, "lov setup");
2807 rc = record_attach(env, llh, lovname, "lov", uuid);
2810 rc = record_lov_setup(env, llh, lovname, lovdesc);
2813 rc = record_marker(env, llh, fsdb, CM_END, lovname, "lov setup");
2818 record_end_log(env, &llh);
2820 OBD_FREE_PTR(lovdesc);
2824 /* add failnids to open log */
2825 static int mgs_write_log_failnids(const struct lu_env *env,
2826 struct mgs_target_info *mti,
2827 struct llog_handle *llh,
2830 char *failnodeuuid = NULL;
2831 char *ptr = mti->mti_params;
2832 struct lnet_nid nid;
2836 * #03 L add_uuid nid=uml1@tcp(0x20000c0a80201) nal=90 0: 1:uml1_UUID
2837 * #04 L add_uuid nid=1@elan(0x1000000000001) nal=90 0: 1:uml1_UUID
2838 * #05 L setup 0:OSC_uml1_ost1_mdsA 1:ost1_UUID 2:uml1_UUID
2839 * #06 L add_uuid nid=uml2@tcp(0x20000c0a80202) nal=90 0: 1:uml2_UUID
2840 * #0x L add_uuid nid=2@elan(0x1000000000002) nal=90 0: 1:uml2_UUID
2841 * #07 L add_conn 0:OSC_uml1_ost1_mdsA 1:uml2_UUID
2845 * Pull failnid info out of params string, which may contain something
2846 * like "<nid1>,<nid2>:<nid3>,<nid4>". class_parse_nid() does not
2847 * complain about abnormal inputs like ",:<nid1>", "<nid1>:,<nid2>",
2848 * etc. However, convert_hostnames() should have caught those.
2850 while (class_find_param(ptr, PARAM_FAILNODE, &ptr) == 0) {
2851 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
2852 char nidstr[LNET_NIDSTR_SIZE];
2854 if (failnodeuuid == NULL) {
2855 /* We don't know the failover node name,
2856 * so just use the first nid as the uuid */
2857 libcfs_nidstr_r(&nid, nidstr, sizeof(nidstr));
2858 rc = name_create(&failnodeuuid, nidstr, "");
2863 "add nid %s for failover uuid %s, client %s\n",
2864 libcfs_nidstr_r(&nid, nidstr, sizeof(nidstr)),
2865 failnodeuuid, cliname);
2866 rc = record_add_uuid(env, llh, &nid, failnodeuuid);
2868 * If *ptr is ':', we have added all NIDs for
2872 rc = record_add_conn(env, llh, cliname,
2874 name_destroy(&failnodeuuid);
2875 failnodeuuid = NULL;
2879 rc = record_add_conn(env, llh, cliname, failnodeuuid);
2880 name_destroy(&failnodeuuid);
2881 failnodeuuid = NULL;
2888 static int mgs_write_log_mdc_to_lmv(const struct lu_env *env,
2889 struct mgs_device *mgs,
2891 struct mgs_target_info *mti,
2892 char *logname, char *lmvname)
2894 char tmp[LNET_NIDSTR_SIZE], *nidstr;
2895 struct llog_handle *llh = NULL;
2896 char *mdcname = NULL;
2897 char *nodeuuid = NULL;
2898 char *mdcuuid = NULL;
2899 char *lmvuuid = NULL;
2904 if (mgs_log_is_empty(env, mgs, logname)) {
2905 CERROR("log is empty! Logical error\n");
2909 CDEBUG(D_MGS, "adding mdc for %s to log %s:lmv(%s)\n",
2910 mti->mti_svname, logname, lmvname);
2912 if (!target_supports_large_nid(mti)) {
2913 libcfs_nid2str_r(mti->mti_nids[0], tmp, sizeof(tmp));
2916 nidstr = mti->mti_nidlist[0];
2919 rc = name_create(&nodeuuid, nidstr, "");
2922 rc = name_create(&mdcname, mti->mti_svname, "-mdc");
2925 rc = name_create(&mdcuuid, mdcname, "_UUID");
2928 rc = name_create(&lmvuuid, lmvname, "_UUID");
2932 rc = mgs_modify(env, mgs, fsdb, mti, logname, mti->mti_svname,
2933 "add mdc", CM_SKIP);
2937 rc = record_start_log(env, mgs, &llh, logname);
2940 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
2945 for (i = 0; i < mti->mti_nid_count; i++) {
2946 struct lnet_nid nid;
2948 if (target_supports_large_nid(mti)) {
2949 rc = libcfs_strnid(&nid, mti->mti_nidlist[i]);
2953 lnet_nid4_to_nid(mti->mti_nids[i], &nid);
2956 CDEBUG(D_MGS, "add nid %s for mdt\n", libcfs_nidstr(&nid));
2957 rc = record_add_uuid(env, llh, &nid, nodeuuid);
2962 rc = record_attach(env, llh, mdcname, LUSTRE_MDC_NAME, lmvuuid);
2965 rc = record_setup(env, llh, mdcname, mti->mti_uuid, nodeuuid,
2969 rc = mgs_write_log_failnids(env, mti, llh, mdcname);
2972 snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
2973 rc = record_mdc_add(env, llh, lmvname, mdcuuid, mti->mti_uuid,
2977 rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname,
2982 record_end_log(env, &llh);
2984 name_destroy(&lmvuuid);
2985 name_destroy(&mdcuuid);
2986 name_destroy(&mdcname);
2987 name_destroy(&nodeuuid);
2991 static int name_create_mdt_and_lov(char **logname, char **lovname,
2992 struct fs_db *fsdb, int i)
2996 rc = name_create_mdt(logname, fsdb->fsdb_name, i);
3000 if (i == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
3001 rc = name_create(lovname, fsdb->fsdb_name, "-mdtlov");
3003 rc = name_create(lovname, *logname, "-mdtlov");
3005 name_destroy(logname);
3011 /* add new mdc to already existent MDS */
3012 static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
3013 struct mgs_device *mgs,
3015 struct mgs_target_info *mti,
3016 int mdt_index, char *logname)
3018 char tmp[LNET_NIDSTR_SIZE], *nidstr;
3019 struct llog_handle *llh = NULL;
3020 char *nodeuuid = NULL;
3021 char *ospname = NULL;
3022 char *lovuuid = NULL;
3023 char *mdtuuid = NULL;
3024 char *mdtname = NULL;
3025 char *lovname = NULL;
3030 if (mgs_log_is_empty(env, mgs, mti->mti_svname)) {
3031 CERROR("log is empty! Logical error\n");
3035 CDEBUG(D_MGS, "adding osp index %d to %s\n", mti->mti_stripe_index,
3038 rc = name_create_mdt(&mdtname, fsdb->fsdb_name, mti->mti_stripe_index);
3042 if (!target_supports_large_nid(mti)) {
3043 libcfs_nid2str_r(mti->mti_nids[0], tmp, sizeof(tmp));
3046 nidstr = mti->mti_nidlist[0];
3049 rc = name_create(&nodeuuid, nidstr, "");
3051 GOTO(out_destory, rc);
3053 rc = name_create_osp(&ospname, NULL, mdtname, mdt_index);
3055 GOTO(out_destory, rc);
3057 rc = name_create_lov(&lovname, logname);
3059 GOTO(out_destory, rc);
3061 rc = name_create(&lovuuid, lovname, "_UUID");
3063 GOTO(out_destory, rc);
3065 rc = name_create(&mdtuuid, mdtname, "_UUID");
3067 GOTO(out_destory, rc);
3069 rc = mgs_modify(env, mgs, fsdb, mti, logname, mti->mti_svname,
3070 "add osp", CM_SKIP);
3072 GOTO(out_destory, rc);
3074 rc = record_start_log(env, mgs, &llh, logname);
3076 GOTO(out_destory, rc);
3078 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
3081 GOTO(out_destory, rc);
3083 for (i = 0; i < mti->mti_nid_count; i++) {
3084 struct lnet_nid nid;
3086 if (target_supports_large_nid(mti)) {
3087 rc = libcfs_strnid(&nid, mti->mti_nidlist[i]);
3091 lnet_nid4_to_nid(mti->mti_nids[i], &nid);
3094 CDEBUG(D_MGS, "add nid %s for mdt\n", libcfs_nidstr(&nid));
3095 rc = record_add_uuid(env, llh, &nid, nodeuuid);
3100 rc = record_attach(env, llh, ospname, LUSTRE_OSP_NAME, lovuuid);
3104 rc = record_setup(env, llh, ospname, mti->mti_uuid, nodeuuid,
3109 rc = mgs_write_log_failnids(env, mti, llh, ospname);
3113 /* Add mdc(osp) to lod */
3114 snprintf(index_str, sizeof(index_str), "%d", mti->mti_stripe_index);
3115 rc = record_base(env, llh, lovname, 0, LCFG_ADD_MDC, mti->mti_uuid,
3116 index_str, "1", NULL);
3120 rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname, "add osp");
3125 record_end_log(env, &llh);
3128 name_destroy(&mdtuuid);
3129 name_destroy(&lovuuid);
3130 name_destroy(&lovname);
3131 name_destroy(&ospname);
3132 name_destroy(&nodeuuid);
3133 name_destroy(&mdtname);
3137 static int mgs_write_log_mdt0(const struct lu_env *env,
3138 struct mgs_device *mgs,
3140 struct mgs_target_info *mti)
3142 char *log = mti->mti_svname;
3143 struct llog_handle *llh = NULL;
3144 struct obd_uuid *uuid;
3147 char *ptr = mti->mti_params;
3148 int rc = 0, failout = 0;
3151 OBD_ALLOC_PTR(uuid);
3155 if (class_find_param(ptr, PARAM_FAILMODE, &ptr) == 0)
3156 failout = (strncmp(ptr, "failout", 7) == 0);
3158 rc = name_create(&lovname, log, "-mdtlov");
3161 if (mgs_log_is_empty(env, mgs, log)) {
3162 rc = mgs_write_log_lov(env, mgs, fsdb, mti, log, lovname);
3167 sprintf(mdt_index, "%d", mti->mti_stripe_index);
3169 rc = record_start_log(env, mgs, &llh, log);
3173 /* add MDT itself */
3175 /* FIXME this whole fn should be a single journal transaction */
3176 sprintf(uuid->uuid, "%s_UUID", log);
3177 rc = record_marker(env, llh, fsdb, CM_START, log, "add mdt");
3180 rc = record_attach(env, llh, log, LUSTRE_MDT_NAME, uuid->uuid);
3183 rc = record_mount_opt(env, llh, log, lovname, NULL);
3186 rc = record_setup(env, llh, log, uuid->uuid, mdt_index, lovname,
3187 failout ? "n" : "f");
3190 rc = record_marker(env, llh, fsdb, CM_END, log, "add mdt");
3194 record_end_log(env, &llh);
3196 name_destroy(&lovname);
3202 /* envelope method for all layers log */
3203 static int mgs_write_log_mdt(const struct lu_env *env,
3204 struct mgs_device *mgs,
3206 struct mgs_target_info *mti)
3208 struct llog_handle *llh = NULL;
3213 CDEBUG(D_MGS, "writing new mdt %s\n", mti->mti_svname);
3215 if (mti->mti_uuid[0] == '\0') {
3216 /* Make up our own uuid */
3217 snprintf(mti->mti_uuid, sizeof(mti->mti_uuid),
3218 "%s_UUID", mti->mti_svname);
3222 rc = mgs_write_log_mdt0(env, mgs, fsdb, mti);
3226 /* Append the mdt info to the client log */
3227 rc = name_create(&cliname, mti->mti_fsname, "-client");
3231 if (mgs_log_is_empty(env, mgs, cliname)) {
3232 /* Start client log */
3233 rc = mgs_write_log_lov(env, mgs, fsdb, mti, cliname,
3237 rc = mgs_write_log_lmv(env, mgs, fsdb, mti, cliname,
3241 rc = mgs_write_log_mount_opt(env, mgs, fsdb, cliname);
3247 * #09 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0: 1:uml1_UUID
3248 * #10 L attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:1d834_MNT_client_03f
3249 * #11 L setup 0:MDC_uml1_mdsA_MNT_client 1:mdsA_UUID 2:uml1_UUID
3250 * #12 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0: 1:uml2_UUID
3251 * #13 L add_conn 0:MDC_uml1_mdsA_MNT_client 1:uml2_UUID
3253 rc = mgs_steal_osp_rec_from_client(env, mgs, cliname, mti, fsdb);
3257 /* Try to copy remaining configurations from an existing MDT target */
3258 rc = mgs_copy_llog_from_mdt(env, mgs, mti, fsdb);
3262 rc = mgs_write_log_mdc_to_lmv(env, mgs, fsdb, mti, cliname,
3267 rc = record_start_log(env, mgs, &llh, cliname);
3271 /* for_all_existing_mdt except current one */
3272 for_each_set_bit(i, fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE) {
3275 if (mti->mti_stripe_index == i)
3278 rc = name_create_mdt(&logname, fsdb->fsdb_name, i);
3283 * NB: If the log for the MDT is empty, it means
3284 * the MDT is only added to the index
3285 * map, and not being process yet, i.e. this
3286 * is an unregistered MDT, see mgs_write_log_target().
3287 * so we should skip it. Otherwise
3289 * 1. MGS get register request for MDT1 and MDT2.
3291 * 2. Then both MDT1 and MDT2 are added into
3292 * fsdb_mdt_index_map. (see mgs_set_index()).
3294 * 3. Then MDT1 get the lock of fsdb_mutex, then
3295 * generate the config log, here, it will regard MDT2
3296 * as an existent MDT, and generate "add osp" for
3297 * lustre-MDT0001-osp-MDT0002. Note: at the moment
3298 * MDT0002 config log is still empty, so it will
3299 * add "add osp" even before "lov setup", which
3300 * will definitly cause trouble.
3302 * 4. MDT1 registeration finished, fsdb_mutex is
3303 * released, then MDT2 get in, then in above
3304 * mgs_steal_osp_rec_from_client(), it will
3305 * add another osp log for lustre-MDT0001-osp-MDT0002,
3306 * which will cause another trouble.
3308 if (!mgs_log_is_empty(env, mgs, logname))
3309 rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb, mti, i,
3311 name_destroy(&logname);
3316 record_end_log(env, &llh);
3318 name_destroy(&cliname);
3322 /* Add the ost info to the client/mdt lov */
3323 static int mgs_write_log_osc_to_lov(const struct lu_env *env,
3324 struct mgs_device *mgs, struct fs_db *fsdb,
3325 struct mgs_target_info *mti,
3326 char *logname, char *suffix, char *lovname,
3327 enum lustre_sec_part sec_part, int flags)
3329 char tmp[LNET_NIDSTR_SIZE], *nidstr;
3330 struct llog_handle *llh = NULL;
3331 char *nodeuuid = NULL;
3332 char *oscname = NULL;
3333 char *oscuuid = NULL;
3334 char *lovuuid = NULL;
3335 char *svname = NULL;
3340 CDEBUG(D_INFO, "adding osc for %s to log %s\n",
3341 mti->mti_svname, logname);
3343 if (mgs_log_is_empty(env, mgs, logname)) {
3344 CERROR("log is empty! Logical error\n");
3348 if (!target_supports_large_nid(mti)) {
3349 libcfs_nid2str_r(mti->mti_nids[0], tmp, sizeof(tmp));
3352 nidstr = mti->mti_nidlist[0];
3355 rc = name_create(&nodeuuid, nidstr, "");
3358 rc = name_create(&svname, mti->mti_svname, "-osc");
3362 /* for the system upgraded from old 1.8, keep using the old osc naming
3363 * style for mdt, see name_create_mdt_osc(). LU-1257 */
3364 if (test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
3365 rc = name_create(&oscname, svname, "");
3367 rc = name_create(&oscname, svname, suffix);
3371 rc = name_create(&oscuuid, oscname, "_UUID");
3374 rc = name_create(&lovuuid, lovname, "_UUID");
3379 * #03 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0: 1:uml1_UUID
3381 * #04 L add_uuid nid=1@elan(0x1000000000001) nal=90 0: 1:uml1_UUID
3382 * #04 L attach 0:OSC_uml1_ost1_MNT_client 1:osc 2:89070_lov1_a41dff51a
3383 * #05 L setup 0:OSC_uml1_ost1_MNT_client 1:ost1_UUID 2:uml1_UUID
3385 * #06 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0: 1:uml2_UUID
3386 * #07 L add_conn 0:OSC_uml1_ost1_MNT_client 1:uml2_UUID
3387 * #08 L lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1
3390 rc = record_start_log(env, mgs, &llh, logname);
3394 /* FIXME these should be a single journal transaction */
3395 rc = record_marker(env, llh, fsdb, CM_START | flags, mti->mti_svname,
3400 /* NB: don't change record order, because upon MDT steal OSC config
3401 * from client, it treats all nids before LCFG_SETUP as target nids
3402 * (multiple interfaces), while nids after as failover node nids.
3403 * See mgs_steal_client_llog_handler() LCFG_ADD_UUID.
3405 for (i = 0; i < mti->mti_nid_count; i++) {
3406 struct lnet_nid nid;
3408 if (target_supports_large_nid(mti)) {
3409 rc = libcfs_strnid(&nid, mti->mti_nidlist[i]);
3413 lnet_nid4_to_nid(mti->mti_nids[i], &nid);
3416 CDEBUG(D_MGS, "add nid %s\n", libcfs_nidstr(&nid));
3417 rc = record_add_uuid(env, llh, &nid, nodeuuid);
3422 rc = record_attach(env, llh, oscname, LUSTRE_OSC_NAME, lovuuid);
3425 rc = record_setup(env, llh, oscname, mti->mti_uuid, nodeuuid,
3429 rc = mgs_write_log_failnids(env, mti, llh, oscname);
3433 snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
3435 rc = record_lov_add(env, llh, lovname, mti->mti_uuid, index, "1");
3438 rc = record_marker(env, llh, fsdb, CM_END | flags, mti->mti_svname,
3443 record_end_log(env, &llh);
3445 name_destroy(&lovuuid);
3446 name_destroy(&oscuuid);
3447 name_destroy(&oscname);
3448 name_destroy(&svname);
3449 name_destroy(&nodeuuid);
3453 static int mgs_write_log_ost(const struct lu_env *env,
3454 struct mgs_device *mgs, struct fs_db *fsdb,
3455 struct mgs_target_info *mti)
3457 struct llog_handle *llh = NULL;
3458 char *logname, *lovname;
3459 char *ptr = mti->mti_params;
3460 int rc, flags = 0, failout = 0, i;
3463 CDEBUG(D_MGS, "writing new ost %s\n", mti->mti_svname);
3465 /* The ost startup log */
3467 /* If the ost log already exists, that means that someone reformatted
3468 * the ost and it called target_add again.
3470 if (!mgs_log_is_empty(env, mgs, mti->mti_svname)) {
3471 LCONSOLE_ERROR_MSG(0x141,
3472 "The config log for %s already exists, yet the server claims it never registered. It may have been reformatted, or the index changed. writeconf the MDT to regenerate all logs.\n",
3478 * attach obdfilter ost1 ost1_UUID
3479 * setup /dev/loop2 ldiskfs f|n errors=remount-ro,user_xattr
3481 if (class_find_param(ptr, PARAM_FAILMODE, &ptr) == 0)
3482 failout = (strncmp(ptr, "failout", 7) == 0);
3483 rc = record_start_log(env, mgs, &llh, mti->mti_svname);
3486 /* FIXME these should be a single journal transaction */
3487 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,"add ost");
3490 if (*mti->mti_uuid == '\0')
3491 snprintf(mti->mti_uuid, sizeof(mti->mti_uuid),
3492 "%s_UUID", mti->mti_svname);
3493 rc = record_attach(env, llh, mti->mti_svname,
3494 "obdfilter"/*LUSTRE_OST_NAME*/, mti->mti_uuid);
3497 rc = record_setup(env, llh, mti->mti_svname,
3498 "dev"/*ignored*/, "type"/*ignored*/,
3499 failout ? "n" : "f", NULL/*options*/);
3502 rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname, "add ost");
3506 record_end_log(env, &llh);
3509 /* We also have to update the other logs where this osc is part of
3513 if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags)) {
3514 /* If we're upgrading, the old mdt log already has our
3515 * entry. Let's do a fake one for fun.
3517 /* Note that we can't add any new failnids, since we don't
3518 * know the old osc names.
3520 flags = CM_SKIP | CM_UPGRADE146;
3521 } else if ((mti->mti_flags & LDD_F_UPDATE) != LDD_F_UPDATE) {
3522 /* If the update flag isn't set, don't update client/mdt
3526 LCONSOLE_WARN("Client log for %s was not updated; writeconf the MDT first to regenerate it.\n",
3530 /* Add ost to all MDT lov defs */
3531 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3532 if (test_bit(i, fsdb->fsdb_mdt_index_map)) {
3535 rc = name_create_mdt_and_lov(&logname, &lovname, fsdb,
3540 snprintf(mdt_index, sizeof(mdt_index), "-MDT%04x", i);
3541 rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, mti,
3543 lovname, LUSTRE_SP_MDT,
3545 name_destroy(&logname);
3546 name_destroy(&lovname);
3552 /* Append ost info to the client log */
3553 rc = name_create(&logname, mti->mti_fsname, "-client");
3556 if (mgs_log_is_empty(env, mgs, logname)) {
3557 /* Start client log */
3558 rc = mgs_write_log_lov(env, mgs, fsdb, mti, logname,
3562 rc = mgs_write_log_lmv(env, mgs, fsdb, mti, logname,
3566 rc = mgs_write_log_mount_opt(env, mgs, fsdb, logname);
3570 rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, mti, logname, "",
3571 fsdb->fsdb_clilov, LUSTRE_SP_CLI, flags);
3573 name_destroy(&logname);
3577 static __inline__ int mgs_param_empty(char *ptr)
3579 char *tmp = strchr(ptr, '=');
3581 if (tmp && tmp[1] == '\0')
3586 static int mgs_write_log_failnid_internal(const struct lu_env *env,
3587 struct mgs_device *mgs,
3589 struct mgs_target_info *mti,
3590 char *logname, char *cliname)
3593 struct llog_handle *llh = NULL;
3595 if (mgs_param_empty(mti->mti_params)) {
3596 /* Remove _all_ failnids */
3597 rc = mgs_modify(env, mgs, fsdb, mti, logname,
3598 mti->mti_svname, "add failnid", CM_SKIP);
3599 return rc < 0 ? rc : 0;
3602 /* Otherwise failover nids are additive */
3603 rc = record_start_log(env, mgs, &llh, logname);
3606 /* FIXME this should be a single journal transaction */
3607 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
3611 rc = mgs_write_log_failnids(env, mti, llh, cliname);
3614 rc = record_marker(env, llh, fsdb, CM_END,
3615 mti->mti_svname, "add failnid");
3617 record_end_log(env, &llh);
3621 /* Add additional failnids to an existing log.
3622 The mdc/osc must have been added to logs first */
3623 /* tcp nids must be in dotted-quad ascii -
3624 we can't resolve hostnames from the kernel. */
3625 static int mgs_write_log_add_failnid(const struct lu_env *env,
3626 struct mgs_device *mgs,
3628 struct mgs_target_info *mti)
3630 char *logname, *cliname;
3635 /* FIXME we currently can't erase the failnids
3636 * given when a target first registers, since they aren't part of
3637 * an "add uuid" stanza
3640 /* Verify that we know about this target */
3641 if (mgs_log_is_empty(env, mgs, mti->mti_svname)) {
3642 LCONSOLE_ERROR_MSG(0x142,
3643 "The target %s has not registered yet. It must be started before failnids can be added.\n",
3648 /* Create mdc/osc client name (e.g. lustre-OST0001-osc) */
3649 if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
3650 rc = name_create(&cliname, mti->mti_svname, "-mdc");
3651 else if (mti->mti_flags & LDD_F_SV_TYPE_OST)
3652 rc = name_create(&cliname, mti->mti_svname, "-osc");
3659 /* Add failover nids to the client log */
3660 rc = name_create(&logname, mti->mti_fsname, "-client");
3662 name_destroy(&cliname);
3666 rc = mgs_write_log_failnid_internal(env, mgs, fsdb,mti,logname,cliname);
3667 name_destroy(&logname);
3668 name_destroy(&cliname);
3673 /* Add OST/MDT failover nids to the MDT logs as well */
3674 for_each_set_bit(i, fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE) {
3675 /* No osp device fsname-MDTXXXX-osp-MDTXXXX in conf*/
3676 if (mti->mti_flags & LDD_F_SV_TYPE_MDT
3677 && mti->mti_stripe_index == i)
3680 rc = name_create_mdt(&logname, mti->mti_fsname, i);
3684 if (mgs_log_is_empty(env, mgs, logname)) {
3685 name_destroy(&logname);
3689 rc = name_create_osp(&cliname, NULL, mti->mti_svname, i);
3691 name_destroy(&logname);
3694 rc = mgs_write_log_failnid_internal(env, mgs, fsdb, mti,
3696 name_destroy(&cliname);
3697 name_destroy(&logname);
3705 static int mgs_wlp_lcfg(const struct lu_env *env,
3706 struct mgs_device *mgs, struct fs_db *fsdb,
3707 struct mgs_target_info *mti,
3708 char *logname, struct lustre_cfg_bufs *bufs,
3709 char *tgtname, char *ptr)
3711 char comment[MTI_NAME_MAXLEN];
3713 struct llog_cfg_rec *lcr;
3716 /* Erase any old settings of this same parameter */
3717 strscpy(comment, ptr, sizeof(comment));
3718 /* But don't try to match the value. */
3719 tmp = strchr(comment, '=');
3722 /* FIXME we should skip settings that are the same as old values */
3723 rc = mgs_modify(env, mgs, fsdb, mti, logname, tgtname, comment,CM_SKIP);
3726 del = mgs_param_empty(ptr);
3728 LCONSOLE_INFO("%s parameter %s.%s in log %s\n", del ? "Disabling" : rc ?
3729 "Setting" : "Modifying", tgtname, comment, logname);
3731 /* mgs_modify() will return 1 if nothing had to be done */
3737 lustre_cfg_bufs_reset(bufs, tgtname);
3738 lustre_cfg_bufs_set_string(bufs, 1, ptr);
3739 if (mti->mti_flags & LDD_F_PARAM2)
3740 lustre_cfg_bufs_set_string(bufs, 2, LCTL_UPCALL);
3742 lcr = lustre_cfg_rec_new((mti->mti_flags & LDD_F_PARAM2) ?
3743 LCFG_SET_PARAM : LCFG_PARAM, bufs);
3747 rc = mgs_write_log_direct(env, mgs, fsdb, logname, lcr, tgtname,
3749 lustre_cfg_rec_free(lcr);
3753 /* write global variable settings into log */
3754 static int mgs_write_log_sys(const struct lu_env *env,
3755 struct mgs_device *mgs, struct fs_db *fsdb,
3756 struct mgs_target_info *mti, char *sys, char *ptr)
3758 struct mgs_thread_info *mgi = mgs_env_info(env);
3759 struct lustre_cfg *lcfg;
3760 struct llog_cfg_rec *lcr;
3762 int rc, cmd, convert = 1;
3764 if (class_match_param(ptr, PARAM_TIMEOUT, &tmp) == 0) {
3765 cmd = LCFG_SET_TIMEOUT;
3766 } else if (class_match_param(ptr, PARAM_LDLM_TIMEOUT, &tmp) == 0) {
3767 cmd = LCFG_SET_LDLM_TIMEOUT;
3768 /* Check for known params here so we can return error to lctl */
3769 } else if ((class_match_param(ptr, PARAM_AT_MIN, &tmp) == 0) ||
3770 (class_match_param(ptr, PARAM_AT_MAX, &tmp) == 0) ||
3771 (class_match_param(ptr, PARAM_AT_EXTRA, &tmp) == 0) ||
3772 (class_match_param(ptr, PARAM_AT_EARLY_MARGIN, &tmp) == 0) ||
3773 (class_match_param(ptr, PARAM_AT_HISTORY, &tmp) == 0)) {
3775 } else if (class_match_param(ptr, PARAM_JOBID_VAR, &tmp) == 0) {
3776 convert = 0; /* Don't convert string value to integer */
3782 if (mgs_param_empty(ptr))
3783 CDEBUG(D_MGS, "global '%s' removed\n", sys);
3785 CDEBUG(D_MGS, "global '%s' val=%s\n", sys, tmp);
3787 lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL);
3788 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, sys);
3789 if (!convert && *tmp != '\0')
3790 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 2, tmp);
3791 lcr = lustre_cfg_rec_new(cmd, &mgi->mgi_bufs);
3795 lcfg = &lcr->lcr_cfg;
3797 rc = kstrtouint(tmp, 0, &lcfg->lcfg_num);
3799 GOTO(out_rec_free, rc);
3804 /* truncate the comment to the parameter name */
3808 /* modify all servers and clients */
3809 rc = mgs_write_log_direct_all(env, mgs, fsdb, mti,
3810 *tmp == '\0' ? NULL : lcr,
3811 mti->mti_fsname, sys, 0);
3812 if (rc == 0 && *tmp != '\0') {
3814 case LCFG_SET_TIMEOUT:
3815 if (!obd_timeout_set || lcfg->lcfg_num > obd_timeout)
3816 class_process_config(lcfg);
3818 case LCFG_SET_LDLM_TIMEOUT:
3819 if (!ldlm_timeout_set || lcfg->lcfg_num > ldlm_timeout)
3820 class_process_config(lcfg);
3828 lustre_cfg_rec_free(lcr);
3832 /* write quota settings into log */
3833 static int mgs_write_log_quota(const struct lu_env *env, struct mgs_device *mgs,
3834 struct fs_db *fsdb, struct mgs_target_info *mti,
3835 char *quota, char *ptr)
3837 struct mgs_thread_info *mgi = mgs_env_info(env);
3838 struct llog_cfg_rec *lcr;
3841 int rc, cmd = LCFG_PARAM;
3843 /* support only 'meta' and 'data' pools so far */
3844 if (class_match_param(ptr, QUOTA_METAPOOL_NAME, &tmp) != 0 &&
3845 class_match_param(ptr, QUOTA_DATAPOOL_NAME, &tmp) != 0) {
3846 CERROR("parameter quota.%s isn't supported (only quota.mdt "
3847 "& quota.ost are)\n", ptr);
3852 CDEBUG(D_MGS, "global '%s' removed\n", quota);
3854 CDEBUG(D_MGS, "global '%s'\n", quota);
3856 if (strchr(tmp, 'u') == NULL && strchr(tmp, 'g') == NULL &&
3857 strchr(tmp, 'p') == NULL &&
3858 strcmp(tmp, "none") != 0) {
3859 CERROR("enable option(%s) isn't supported\n", tmp);
3864 lustre_cfg_bufs_reset(&mgi->mgi_bufs, mti->mti_fsname);
3865 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, quota);
3866 lcr = lustre_cfg_rec_new(cmd, &mgi->mgi_bufs);
3870 /* truncate the comment to the parameter name */
3875 /* XXX we duplicated quota enable information in all server
3876 * config logs, it should be moved to a separate config
3877 * log once we cleanup the config log for global param. */
3878 /* modify all servers */
3879 rc = mgs_write_log_direct_all(env, mgs, fsdb, mti,
3880 *tmp == '\0' ? NULL : lcr,
3881 mti->mti_fsname, quota, 1);
3883 lustre_cfg_rec_free(lcr);
3884 return rc < 0 ? rc : 0;
3887 static int mgs_srpc_set_param_disk(const struct lu_env *env,
3888 struct mgs_device *mgs,
3890 struct mgs_target_info *mti,
3893 struct mgs_thread_info *mgi = mgs_env_info(env);
3894 struct llog_cfg_rec *lcr;
3895 struct llog_handle *llh = NULL;
3897 char *comment, *ptr;
3903 ptr = strchr(param, '=');
3904 LASSERT(ptr != NULL);
3907 OBD_ALLOC(comment, len + 1);
3908 if (comment == NULL)
3910 strncpy(comment, param, len);
3911 comment[len] = '\0';
3914 lustre_cfg_bufs_reset(&mgi->mgi_bufs, mti->mti_svname);
3915 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, param);
3916 lcr = lustre_cfg_rec_new(LCFG_SPTLRPC_CONF, &mgi->mgi_bufs);
3918 GOTO(out_comment, rc = -ENOMEM);
3920 /* construct log name */
3921 rc = name_create(&logname, mti->mti_fsname, "-sptlrpc");
3925 if (mgs_log_is_empty(env, mgs, logname)) {
3926 rc = record_start_log(env, mgs, &llh, logname);
3929 record_end_log(env, &llh);
3932 /* obsolete old one */
3933 rc = mgs_modify(env, mgs, fsdb, mti, logname, mti->mti_svname,
3937 /* write the new one */
3938 rc = mgs_write_log_direct(env, mgs, fsdb, logname, lcr,
3939 mti->mti_svname, comment);
3941 CERROR("%s: error writing log %s: rc = %d\n",
3942 mgs->mgs_obd->obd_name, logname, rc);
3944 name_destroy(&logname);
3946 lustre_cfg_rec_free(lcr);
3948 OBD_FREE(comment, len + 1);
3952 static int mgs_srpc_set_param_udesc_mem(struct fs_db *fsdb,
3957 /* disable the adjustable udesc parameter for now, i.e. use default
3958 * setting that client always ship udesc to MDT if possible. to enable
3959 * it simply remove the following line
3963 ptr = strchr(param, '=');
3968 if (strcmp(param, PARAM_SRPC_UDESC))
3971 if (strcmp(ptr, "yes") == 0) {
3972 set_bit(FSDB_UDESC, &fsdb->fsdb_flags);
3973 CWARN("Enable user descriptor shipping from client to MDT\n");
3974 } else if (strcmp(ptr, "no") == 0) {
3975 clear_bit(FSDB_UDESC, &fsdb->fsdb_flags);
3976 CWARN("Disable user descriptor shipping from client to MDT\n");
3984 CERROR("Invalid param: %s\n", param);
3988 static int mgs_srpc_set_param_mem(struct fs_db *fsdb,
3992 struct sptlrpc_rule rule;
3993 struct sptlrpc_rule_set *rset;
3997 if (strncmp(param, PARAM_SRPC, sizeof(PARAM_SRPC) - 1) != 0) {
3998 CERROR("Invalid sptlrpc parameter: %s\n", param);
4002 if (strncmp(param, PARAM_SRPC_UDESC,
4003 sizeof(PARAM_SRPC_UDESC) - 1) == 0) {
4004 RETURN(mgs_srpc_set_param_udesc_mem(fsdb, param));
4007 if (strncmp(param, PARAM_SRPC_FLVR, sizeof(PARAM_SRPC_FLVR) - 1) != 0) {
4008 CERROR("Invalid sptlrpc flavor parameter: %s\n", param);
4012 param += sizeof(PARAM_SRPC_FLVR) - 1;
4014 rc = sptlrpc_parse_rule(param, &rule);
4018 /* mgs rules implies must be mgc->mgs */
4019 if (test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
4020 if ((rule.sr_from != LUSTRE_SP_MGC &&
4021 rule.sr_from != LUSTRE_SP_ANY) ||
4022 (rule.sr_to != LUSTRE_SP_MGS &&
4023 rule.sr_to != LUSTRE_SP_ANY))
4027 /* prepare room for this coming rule. svcname format should be:
4028 * - fsname: general rule
4029 * - fsname-tgtname: target-specific rule
4031 if (strchr(svname, '-')) {
4032 struct mgs_tgt_srpc_conf *tgtconf;
4035 for (tgtconf = fsdb->fsdb_srpc_tgt; tgtconf != NULL;
4036 tgtconf = tgtconf->mtsc_next) {
4037 if (!strcmp(tgtconf->mtsc_tgt, svname)) {
4046 OBD_ALLOC_PTR(tgtconf);
4047 if (tgtconf == NULL)
4050 name_len = strlen(svname);
4052 OBD_ALLOC(tgtconf->mtsc_tgt, name_len + 1);
4053 if (tgtconf->mtsc_tgt == NULL) {
4054 OBD_FREE_PTR(tgtconf);
4057 memcpy(tgtconf->mtsc_tgt, svname, name_len);
4059 tgtconf->mtsc_next = fsdb->fsdb_srpc_tgt;
4060 fsdb->fsdb_srpc_tgt = tgtconf;
4063 rset = &tgtconf->mtsc_rset;
4064 } else if (strcmp(svname, MGSSELF_NAME) == 0) {
4065 /* put _mgs related srpc rule directly in mgs ruleset */
4066 rset = &fsdb->fsdb_mgs->mgs_lut.lut_sptlrpc_rset;
4068 rset = &fsdb->fsdb_srpc_gen;
4071 rc = sptlrpc_rule_set_merge(rset, &rule);
4076 static int mgs_srpc_set_param(const struct lu_env *env,
4077 struct mgs_device *mgs,
4079 struct mgs_target_info *mti,
4089 /* keep a copy of original param, which could be destroyed
4092 copy_size = strlen(param) + 1;
4093 OBD_ALLOC(copy, copy_size);
4096 memcpy(copy, param, copy_size);
4098 rc = mgs_srpc_set_param_mem(fsdb, mti->mti_svname, param);
4102 /* previous steps guaranteed the syntax is correct */
4103 rc = mgs_srpc_set_param_disk(env, mgs, fsdb, mti, copy);
4107 if (test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
4109 * for mgs rules, make them effective immediately.
4111 LASSERT(fsdb->fsdb_srpc_tgt == NULL);
4112 sptlrpc_target_update_exp_flavor(mgs->mgs_obd,
4113 &fsdb->fsdb_srpc_gen);
4117 OBD_FREE(copy, copy_size);
4121 struct mgs_srpc_read_data {
4122 struct fs_db *msrd_fsdb;
4126 static int mgs_srpc_read_handler(const struct lu_env *env,
4127 struct llog_handle *llh,
4128 struct llog_rec_hdr *rec, void *data)
4130 struct mgs_srpc_read_data *msrd = data;
4131 struct cfg_marker *marker;
4132 struct lustre_cfg *lcfg = REC_DATA(rec);
4133 char *svname, *param;
4137 if (rec->lrh_type != OBD_CFG_REC) {
4138 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
4142 cfg_len = REC_DATA_LEN(rec);
4144 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
4146 CERROR("Insane cfg\n");
4150 if (lcfg->lcfg_command == LCFG_MARKER) {
4151 marker = lustre_cfg_buf(lcfg, 1);
4153 if (marker->cm_flags & CM_START &&
4154 marker->cm_flags & CM_SKIP)
4155 msrd->msrd_skip = 1;
4156 if (marker->cm_flags & CM_END)
4157 msrd->msrd_skip = 0;
4162 if (msrd->msrd_skip)
4165 if (lcfg->lcfg_command != LCFG_SPTLRPC_CONF) {
4166 CERROR("invalid command (%x)\n", lcfg->lcfg_command);
4170 svname = lustre_cfg_string(lcfg, 0);
4171 if (svname == NULL) {
4172 CERROR("svname is empty\n");
4176 param = lustre_cfg_string(lcfg, 1);
4177 if (param == NULL) {
4178 CERROR("param is empty\n");
4182 rc = mgs_srpc_set_param_mem(msrd->msrd_fsdb, svname, param);
4184 CERROR("read sptlrpc record error (%d): %s\n", rc, param);
4189 int mgs_get_fsdb_srpc_from_llog(const struct lu_env *env,
4190 struct mgs_device *mgs,
4193 struct llog_handle *llh = NULL;
4194 struct llog_ctxt *ctxt;
4196 struct mgs_srpc_read_data msrd;
4200 /* construct log name */
4201 rc = name_create(&logname, fsdb->fsdb_name, "-sptlrpc");
4205 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
4206 LASSERT(ctxt != NULL);
4208 if (mgs_log_is_empty(env, mgs, logname))
4211 rc = llog_open(env, ctxt, &llh, NULL, logname,
4219 rc = llog_init_handle(env, llh, LLOG_F_IS_PLAIN, NULL);
4221 GOTO(out_close, rc);
4223 if (llog_get_size(llh) <= 1)
4224 GOTO(out_close, rc = 0);
4226 msrd.msrd_fsdb = fsdb;
4229 rc = llog_process(env, llh, mgs_srpc_read_handler, (void *)&msrd,
4233 llog_close(env, llh);
4235 llog_ctxt_put(ctxt);
4236 name_destroy(&logname);
4239 CERROR("failed to read sptlrpc config database: %d\n", rc);
4243 static int mgs_write_log_param2(const struct lu_env *env,
4244 struct mgs_device *mgs,
4246 struct mgs_target_info *mti, char *ptr)
4248 struct lustre_cfg_bufs bufs;
4252 CDEBUG(D_MGS, "next param '%s'\n", ptr);
4254 /* PARAM_MGSNODE and PARAM_NETWORK are set only when formating
4255 * or during the inital mount. It can never change after that.
4257 if (!class_match_param(ptr, PARAM_MGSNODE, NULL) ||
4258 !class_match_param(ptr, PARAM_NETWORK, NULL)) {
4263 /* Processed in mgs_write_log_ost. Another value that can't
4264 * be changed by lctl set_param -P.
4266 if (!class_match_param(ptr, PARAM_FAILMODE, NULL)) {
4267 LCONSOLE_ERROR_MSG(0x169,
4268 "%s can only be changed with tunefs.lustre and --writeconf\n",
4274 /* FIXME !!! Support for sptlrpc is incomplete. Currently the change
4275 * doesn't transmit to the client. See LU-7183.
4277 if (!class_match_param(ptr, PARAM_SRPC, NULL)) {
4278 rc = mgs_srpc_set_param(env, mgs, fsdb, mti, ptr);
4282 /* Can't use class_match_param since ptr doesn't start with
4283 * PARAM_FAILNODE. So we look for PARAM_FAILNODE contained in ptr.
4285 if (strstr(ptr, PARAM_FAILNODE)) {
4286 /* Add a failover nidlist. We already processed failovers
4287 * params for new targets in mgs_write_log_target.
4291 /* can't use wildcards with failover.node */
4292 if (strchr(ptr, '*')) {
4297 param = strstr(ptr, PARAM_FAILNODE);
4298 rc = strscpy(mti->mti_params, param, sizeof(mti->mti_params));
4302 CDEBUG(D_MGS, "Adding failnode with param %s\n",
4304 rc = mgs_write_log_add_failnid(env, mgs, fsdb, mti);
4308 /* root squash parameters must not be set on llite subsystem, this can
4309 * lead to inconsistencies between client and server values
4311 if ((strstr(ptr, PARAM_NOSQUASHNIDS) ||
4312 strstr(ptr, PARAM_ROOTSQUASH)) &&
4313 strncmp(ptr, "llite.", strlen("llite.")) == 0) {
4315 CWARN("%s: cannot add %s param to llite subsystem, use mdt instead: rc=%d\n",
4316 mgs->mgs_obd->obd_name,
4317 strstr(ptr, PARAM_ROOTSQUASH) ?
4318 PARAM_ROOTSQUASH : PARAM_NOSQUASHNIDS,
4323 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, PARAMS_FILENAME, &bufs,
4324 mti->mti_svname, ptr);
4329 /* Permanent settings of all parameters by writing into the appropriate
4330 * configuration logs.
4331 * A parameter with null value ("<param>='\0'") means to erase it out of
4334 static int mgs_write_log_param(const struct lu_env *env,
4335 struct mgs_device *mgs, struct fs_db *fsdb,
4336 struct mgs_target_info *mti, char *ptr)
4338 struct mgs_thread_info *mgi = mgs_env_info(env);
4344 /* For various parameter settings, we have to figure out which logs
4345 * care about them (e.g. both mdt and client for lov settings)
4347 CDEBUG(D_MGS, "next param '%s'\n", ptr);
4349 /* The params are stored in MOUNT_DATA_FILE and modified via
4350 * tunefs.lustre, or set using lctl conf_param
4353 /* Processed in lustre_start_mgc */
4354 if (class_match_param(ptr, PARAM_MGSNODE, NULL) == 0)
4357 /* Processed in ost/mdt */
4358 if (class_match_param(ptr, PARAM_NETWORK, NULL) == 0)
4361 /* Processed in mgs_write_log_ost */
4362 if (class_match_param(ptr, PARAM_FAILMODE, NULL) == 0) {
4363 if (mti->mti_flags & LDD_F_PARAM) {
4364 LCONSOLE_ERROR_MSG(0x169,
4365 "%s can only be changed with tunefs.lustre and --writeconf\n",
4372 if (class_match_param(ptr, PARAM_SRPC, NULL) == 0) {
4373 rc = mgs_srpc_set_param(env, mgs, fsdb, mti, ptr);
4377 if (class_match_param(ptr, PARAM_FAILNODE, NULL) == 0) {
4378 /* Add a failover nidlist */
4380 /* We already processed failovers params for new
4381 * targets in mgs_write_log_target
4383 if (mti->mti_flags & LDD_F_PARAM) {
4384 CDEBUG(D_MGS, "Adding failnode\n");
4385 rc = mgs_write_log_add_failnid(env, mgs, fsdb, mti);
4390 if (class_match_param(ptr, PARAM_SYS, &tmp) == 0) {
4391 rc = mgs_write_log_sys(env, mgs, fsdb, mti, ptr, tmp);
4395 if (class_match_param(ptr, PARAM_QUOTA, &tmp) == 0) {
4396 rc = mgs_write_log_quota(env, mgs, fsdb, mti, ptr, tmp);
4400 if (class_match_param(ptr, PARAM_OSC PARAM_ACTIVE, &tmp) == 0 ||
4401 class_match_param(ptr, PARAM_MDC PARAM_ACTIVE, &tmp) == 0) {
4402 /* active=0 means off, anything else means on */
4403 int flag = (*tmp == '0') ? CM_EXCLUDE : 0;
4404 bool deactive_osc = memcmp(ptr, PARAM_OSC PARAM_ACTIVE,
4405 strlen(PARAM_OSC PARAM_ACTIVE)) == 0;
4408 if (!deactive_osc) {
4411 rc = server_name2index(mti->mti_svname, &index, NULL);
4416 LCONSOLE_ERROR_MSG(0x144, "%s: MDC0 can not be"
4417 " (de)activated.\n",
4419 GOTO(end, rc = -EPERM);
4423 LCONSOLE_WARN("Permanently %sactivating %s\n",
4424 flag ? "de" : "re", mti->mti_svname);
4426 rc = name_create(&logname, mti->mti_fsname, "-client");
4429 rc = mgs_modify(env, mgs, fsdb, mti, logname,
4431 deactive_osc ? "add osc" : "add mdc", flag);
4432 name_destroy(&logname);
4437 /* Add to all MDT logs for DNE */
4438 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
4439 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
4441 rc = name_create_mdt(&logname, mti->mti_fsname, i);
4444 rc = mgs_modify(env, mgs, fsdb, mti, logname,
4446 deactive_osc ? "add osc" : "add osp",
4448 name_destroy(&logname);
4454 LCONSOLE_ERROR_MSG(0x145,
4455 "Couldn't find %s in log (%d). No permanent changes were made to the config log.\n",
4456 mti->mti_svname, rc);
4457 if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags))
4458 LCONSOLE_ERROR_MSG(0x146,
4459 "This may be because the log is in the old 1.4 style. Consider --writeconf to update the logs.\n");
4462 /* Fall through to osc/mdc proc for deactivating live
4463 * OSC/OSP on running MDT / clients.
4466 /* Below here, let obd's XXX_process_config methods handle it */
4468 /* All lov. in proc */
4469 if (class_match_param(ptr, PARAM_LOV, NULL) == 0) {
4472 CDEBUG(D_MGS, "lov param %s\n", ptr);
4473 if (!(mti->mti_flags & LDD_F_SV_TYPE_MDT)) {
4474 LCONSOLE_ERROR_MSG(0x147,
4475 "LOV params must be set on the MDT, not %s. Ignoring.\n",
4481 if (mgs_log_is_empty(env, mgs, mti->mti_svname))
4482 GOTO(end, rc = -ENODEV);
4484 rc = name_create_mdt_and_lov(&logname, &mdtlovname, fsdb,
4485 mti->mti_stripe_index);
4488 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, mti->mti_svname,
4489 &mgi->mgi_bufs, mdtlovname, ptr);
4490 name_destroy(&logname);
4491 name_destroy(&mdtlovname);
4496 rc = name_create(&logname, mti->mti_fsname, "-client");
4499 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname, &mgi->mgi_bufs,
4500 fsdb->fsdb_clilov, ptr);
4501 name_destroy(&logname);
4505 /* All osc., mdc., llite. params in proc */
4506 if ((class_match_param(ptr, PARAM_OSC, NULL) == 0) ||
4507 (class_match_param(ptr, PARAM_MDC, NULL) == 0) ||
4508 (class_match_param(ptr, PARAM_LLITE, NULL) == 0)) {
4511 if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags)) {
4512 LCONSOLE_ERROR_MSG(0x148, "Upgraded client logs for %s"
4513 " cannot be modified. Consider"
4514 " updating the configuration with"
4517 GOTO(end, rc = -EINVAL);
4519 if (memcmp(ptr, PARAM_LLITE, strlen(PARAM_LLITE)) == 0) {
4520 rc = name_create(&cname, mti->mti_fsname, "-client");
4521 /* Add the client type to match the obdname in
4522 * class_config_llog_handler
4524 } else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
4525 rc = name_create(&cname, mti->mti_svname, "-mdc");
4526 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
4527 rc = name_create(&cname, mti->mti_svname, "-osc");
4529 GOTO(end, rc = -EINVAL);
4534 /* Forbid direct update of llite root squash parameters.
4535 * These parameters are indirectly set via the MDT settings.
4537 if ((class_match_param(ptr, PARAM_LLITE, &tmp) == 0) &&
4538 ((memcmp(tmp, "root_squash=", 12) == 0) ||
4539 (memcmp(tmp, "nosquash_nids=", 14) == 0))) {
4540 LCONSOLE_ERROR("%s: root squash parameters can only "
4541 "be updated through MDT component\n",
4543 name_destroy(&cname);
4544 GOTO(end, rc = -EINVAL);
4547 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
4550 rc = name_create(&logname, mti->mti_fsname, "-client");
4552 name_destroy(&cname);
4555 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname, &mgi->mgi_bufs,
4558 /* osc params affect the MDT as well */
4559 if (!rc && (mti->mti_flags & LDD_F_SV_TYPE_OST)) {
4562 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
4563 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
4565 name_destroy(&cname);
4566 rc = name_create_osp(&cname, NULL,
4567 mti->mti_svname, i);
4568 name_destroy(&logname);
4571 rc = name_create_mdt(&logname,
4572 mti->mti_fsname, i);
4575 if (!mgs_log_is_empty(env, mgs, logname)) {
4576 rc = mgs_wlp_lcfg(env, mgs, fsdb,
4586 /* For mdc activate/deactivate, it affects OSP on MDT as well */
4587 if (class_match_param(ptr, PARAM_MDC PARAM_ACTIVE, &tmp) == 0 &&
4590 char *lodname = NULL;
4591 char *param_str = NULL;
4595 /* replace mdc with osp */
4596 memcpy(ptr, PARAM_OSP, strlen(PARAM_OSP));
4597 rc = server_name2index(mti->mti_svname, &index, NULL);
4599 memcpy(ptr, PARAM_MDC, strlen(PARAM_MDC));
4603 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
4604 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
4610 name_destroy(&logname);
4611 rc = name_create_mdt(&logname, mti->mti_fsname,
4616 if (mgs_log_is_empty(env, mgs, logname))
4619 snprintf(suffix, sizeof(suffix), "-osp-MDT%04x",
4621 name_destroy(&cname);
4622 rc = name_create(&cname, mti->mti_svname,
4627 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname,
4628 &mgi->mgi_bufs, cname, ptr);
4632 /* Add configuration log for noitfying LOD
4633 * to active/deactive the OSP. */
4634 name_destroy(¶m_str);
4635 rc = name_create(¶m_str, cname,
4636 (*tmp == '0') ? ".active=0" :
4641 name_destroy(&lodname);
4642 rc = name_create(&lodname, logname, "-mdtlov");
4646 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname,
4647 &mgi->mgi_bufs, lodname,
4652 memcpy(ptr, PARAM_MDC, strlen(PARAM_MDC));
4653 name_destroy(&lodname);
4654 name_destroy(¶m_str);
4657 name_destroy(&logname);
4658 name_destroy(&cname);
4662 /* All mdt. params in proc */
4663 if (class_match_param(ptr, PARAM_MDT, &tmp) == 0) {
4667 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
4668 if (strncmp(mti->mti_svname, mti->mti_fsname,
4669 MTI_NAME_MAXLEN) == 0)
4670 /* device is unspecified completely? */
4671 rc = LDD_F_SV_TYPE_MDT | LDD_F_SV_ALL;
4673 rc = server_name2index(mti->mti_svname, &idx, NULL);
4676 if ((rc & LDD_F_SV_TYPE_MDT) == 0)
4678 if (rc & LDD_F_SV_ALL) {
4679 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
4681 fsdb->fsdb_mdt_index_map))
4683 rc = name_create_mdt(&logname,
4684 mti->mti_fsname, i);
4687 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti,
4688 logname, &mgi->mgi_bufs,
4690 name_destroy(&logname);
4695 if ((memcmp(tmp, "root_squash=", 12) == 0) ||
4696 (memcmp(tmp, "nosquash_nids=", 14) == 0)) {
4697 LCONSOLE_ERROR("%s: root squash parameters "
4698 "cannot be applied to a single MDT\n",
4700 GOTO(end, rc = -EINVAL);
4702 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti,
4703 mti->mti_svname, &mgi->mgi_bufs,
4704 mti->mti_svname, ptr);
4709 /* root squash settings are also applied to llite
4710 * config log (see LU-1778) */
4712 ((memcmp(tmp, "root_squash=", 12) == 0) ||
4713 (memcmp(tmp, "nosquash_nids=", 14) == 0))) {
4717 rc = name_create(&cname, mti->mti_fsname, "-client");
4720 rc = name_create(&logname, mti->mti_fsname, "-client");
4722 name_destroy(&cname);
4725 rc = name_create(&ptr2, PARAM_LLITE, tmp);
4727 name_destroy(&cname);
4728 name_destroy(&logname);
4731 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname,
4732 &mgi->mgi_bufs, cname, ptr2);
4733 name_destroy(&ptr2);
4734 name_destroy(&logname);
4735 name_destroy(&cname);
4740 /* All mdd., ost. and osd. params in proc */
4741 if ((class_match_param(ptr, PARAM_MDD, NULL) == 0) ||
4742 (class_match_param(ptr, PARAM_LOD, NULL) == 0) ||
4743 (class_match_param(ptr, PARAM_OST, NULL) == 0) ||
4744 (class_match_param(ptr, PARAM_OSD, NULL) == 0)) {
4745 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
4746 if (mgs_log_is_empty(env, mgs, mti->mti_svname))
4747 GOTO(end, rc = -ENODEV);
4749 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, mti->mti_svname,
4750 &mgi->mgi_bufs, mti->mti_svname, ptr);
4754 /* For handling degraded zfs OST */
4755 if (class_match_param(ptr, PARAM_AUTODEGRADE, NULL) == 0)
4758 LCONSOLE_WARN("Ignoring unrecognized param '%s'\n", ptr);
4762 CERROR("err %d on param '%s'\n", rc, ptr);
4767 int mgs_write_log_target(const struct lu_env *env, struct mgs_device *mgs,
4768 struct mgs_target_info *mti, struct fs_db *fsdb)
4775 /* set/check the new target index */
4776 rc = mgs_set_index(env, mgs, mti);
4780 if (rc == EALREADY) {
4781 LCONSOLE_WARN("Found index %d for %s, updating log\n",
4782 mti->mti_stripe_index, mti->mti_svname);
4783 /* We would like to mark old log sections as invalid
4784 and add new log sections in the client and mdt logs.
4785 But if we add new sections, then live clients will
4786 get repeat setup instructions for already running
4787 osc's. So don't update the client/mdt logs. */
4788 mti->mti_flags &= ~LDD_F_UPDATE;
4792 CFS_FAIL_TIMEOUT(OBD_FAIL_MGS_WRITE_TARGET_DELAY, cfs_fail_val > 0 ?
4795 mutex_lock(&fsdb->fsdb_mutex);
4797 if (mti->mti_flags & (LDD_F_VIRGIN | LDD_F_WRITECONF)) {
4798 /* Generate a log from scratch */
4799 if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
4800 rc = mgs_write_log_mdt(env, mgs, fsdb, mti);
4801 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
4802 rc = mgs_write_log_ost(env, mgs, fsdb, mti);
4804 CERROR("Unknown target type %#x, can't create log for %s\n",
4805 mti->mti_flags, mti->mti_svname);
4808 CERROR("Can't write logs for %s (%d)\n",
4809 mti->mti_svname, rc);
4813 /* Just update the params from tunefs in mgs_write_log_params */
4814 CDEBUG(D_MGS, "Update params for %s\n", mti->mti_svname);
4815 mti->mti_flags |= LDD_F_PARAM;
4818 /* allocate temporary buffer, where class_get_next_param will
4819 * make copy of a current parameter
4821 OBD_ALLOC(buf, strlen(mti->mti_params) + 1);
4823 GOTO(out_up, rc = -ENOMEM);
4824 params = mti->mti_params;
4825 while (params != NULL) {
4826 rc = class_get_next_param(¶ms, buf);
4829 /* there is no next parameter, that is
4835 CDEBUG(D_MGS, "remaining string: '%s', param: '%s'\n",
4837 rc = mgs_write_log_param(env, mgs, fsdb, mti, buf);
4842 OBD_FREE(buf, strlen(mti->mti_params) + 1);
4845 mutex_unlock(&fsdb->fsdb_mutex);
4849 int mgs_erase_log(const struct lu_env *env, struct mgs_device *mgs, char *name)
4851 struct llog_ctxt *ctxt;
4854 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
4856 CERROR("%s: MGS config context doesn't exist\n",
4857 mgs->mgs_obd->obd_name);
4860 rc = llog_erase(env, ctxt, NULL, name);
4861 /* llog may not exist */
4864 llog_ctxt_put(ctxt);
4868 CERROR("%s: failed to clear log %s: %d\n",
4869 mgs->mgs_obd->obd_name, name, rc);
4874 /* erase all logs for the given fs */
4875 int mgs_erase_logs(const struct lu_env *env, struct mgs_device *mgs,
4878 struct list_head log_list;
4879 struct mgs_direntry *dirent, *n;
4880 char barrier_name[20] = {};
4883 int rc, len = strlen(fsname);
4886 mutex_lock(&mgs->mgs_mutex);
4888 /* Find all the logs in the CONFIGS directory */
4889 rc = class_dentry_readdir(env, mgs, &log_list);
4891 mutex_unlock(&mgs->mgs_mutex);
4895 if (list_empty(&log_list)) {
4896 mutex_unlock(&mgs->mgs_mutex);
4900 snprintf(barrier_name, sizeof(barrier_name) - 1, "%s-%s",
4901 fsname, BARRIER_FILENAME);
4902 /* Delete the barrier fsdb */
4903 mgs_remove_fsdb_by_name(mgs, barrier_name);
4904 /* Delete the fs db */
4905 mgs_remove_fsdb_by_name(mgs, fsname);
4906 mutex_unlock(&mgs->mgs_mutex);
4908 list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
4909 list_del_init(&dirent->mde_list);
4910 suffix = strrchr(dirent->mde_name, '-');
4911 if (suffix != NULL) {
4912 if ((len == suffix - dirent->mde_name) &&
4913 (strncmp(fsname, dirent->mde_name, len) == 0)) {
4914 CDEBUG(D_MGS, "Removing log %s\n",
4916 mgs_erase_log(env, mgs, dirent->mde_name);
4920 mgs_direntry_free(dirent);
4929 /* list all logs for the given fs */
4930 int mgs_list_logs(const struct lu_env *env, struct mgs_device *mgs,
4931 struct obd_ioctl_data *data)
4933 struct list_head log_list;
4934 struct mgs_direntry *dirent, *n;
4935 char *out, *suffix, prefix[] = "config_log: ";
4936 int prefix_len = strlen(prefix);
4937 int len, remains, start = 0, rc;
4941 /* Find all the logs in the CONFIGS directory */
4942 rc = class_dentry_readdir(env, mgs, &log_list);
4946 out = data->ioc_bulk;
4947 remains = data->ioc_inllen1;
4948 /* OBD_FAIL: fetch the config_log records from the specified one */
4949 if (CFS_FAIL_CHECK(OBD_FAIL_CATLIST))
4950 data->ioc_count = cfs_fail_val;
4952 list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
4953 list_del_init(&dirent->mde_list);
4954 suffix = strrchr(dirent->mde_name, '-');
4955 if (suffix != NULL) {
4956 len = prefix_len + dirent->mde_len + 1;
4957 if (remains - len < 0) {
4958 /* No enough space for this record */
4959 mgs_direntry_free(dirent);
4963 if (start < data->ioc_count) {
4964 mgs_direntry_free(dirent);
4967 len = scnprintf(out, remains, "%s%s\n", prefix,
4972 mgs_direntry_free(dirent);
4980 data->ioc_count = start;
4984 struct mgs_lcfg_fork_data {
4985 struct lustre_cfg_bufs mlfd_bufs;
4986 struct mgs_device *mlfd_mgs;
4987 struct llog_handle *mlfd_llh;
4988 const char *mlfd_oldname;
4989 const char *mlfd_newname;
4993 static bool contain_valid_fsname(char *buf, const char *fsname,
4994 int buflen, int namelen)
4996 if (buflen < namelen)
4999 if (memcmp(buf, fsname, namelen) != 0)
5002 if (buf[namelen] != '\0' && buf[namelen] != '-')
5008 static int mgs_lcfg_fork_handler(const struct lu_env *env,
5009 struct llog_handle *o_llh,
5010 struct llog_rec_hdr *o_rec, void *data)
5012 struct mgs_lcfg_fork_data *mlfd = data;
5013 struct lustre_cfg_bufs *n_bufs = &mlfd->mlfd_bufs;
5014 struct lustre_cfg *o_lcfg = (struct lustre_cfg *)(o_rec + 1);
5015 struct llog_cfg_rec *lcr;
5017 char *n_buf = mlfd->mlfd_data;
5019 int o_namelen = strlen(mlfd->mlfd_oldname);
5020 int n_namelen = strlen(mlfd->mlfd_newname);
5021 int diff = n_namelen - o_namelen;
5022 __u32 cmd = o_lcfg->lcfg_command;
5023 __u32 cnt = o_lcfg->lcfg_bufcount;
5029 o_buf = lustre_cfg_buf(o_lcfg, 0);
5030 o_buflen = o_lcfg->lcfg_buflens[0];
5031 if (contain_valid_fsname(o_buf, mlfd->mlfd_oldname, o_buflen,
5033 memcpy(n_buf, mlfd->mlfd_newname, n_namelen);
5034 memcpy(n_buf + n_namelen, o_buf + o_namelen,
5035 o_buflen - o_namelen);
5036 lustre_cfg_bufs_reset(n_bufs, n_buf);
5037 n_buf += round_up(o_buflen + diff, 8);
5039 lustre_cfg_bufs_reset(n_bufs, o_buflen != 0 ? o_buf : NULL);
5044 struct cfg_marker *o_marker;
5045 struct cfg_marker *n_marker;
5049 CDEBUG(D_MGS, "Unknown cfg marker entry with %d "
5054 /* buf[1] is marker */
5055 o_buf = lustre_cfg_buf(o_lcfg, 1);
5056 o_buflen = o_lcfg->lcfg_buflens[1];
5057 o_marker = (struct cfg_marker *)o_buf;
5058 if (!contain_valid_fsname(o_marker->cm_tgtname,
5060 sizeof(o_marker->cm_tgtname),
5062 lustre_cfg_bufs_set(n_bufs, 1, o_marker,
5067 n_marker = (struct cfg_marker *)n_buf;
5068 *n_marker = *o_marker;
5069 memcpy(n_marker->cm_tgtname, mlfd->mlfd_newname, n_namelen);
5070 tgt_namelen = strlen(o_marker->cm_tgtname);
5071 if (tgt_namelen > o_namelen)
5072 memcpy(n_marker->cm_tgtname + n_namelen,
5073 o_marker->cm_tgtname + o_namelen,
5074 tgt_namelen - o_namelen);
5075 n_marker->cm_tgtname[tgt_namelen + diff] = '\0';
5076 lustre_cfg_bufs_set(n_bufs, 1, n_marker, sizeof(*n_marker));
5080 case LCFG_SET_PARAM: {
5081 for (i = 1; i < cnt; i++)
5082 /* buf[i] is the param value, reuse it directly */
5083 lustre_cfg_bufs_set(n_bufs, i,
5084 lustre_cfg_buf(o_lcfg, i),
5085 o_lcfg->lcfg_buflens[i]);
5091 case LCFG_POOL_DEL: {
5092 if (cnt < 3 || cnt > 4) {
5093 CDEBUG(D_MGS, "Unknown cfg pool (%x) entry with %d "
5094 "buffers\n", cmd, cnt);
5098 /* buf[1] is fsname */
5099 o_buf = lustre_cfg_buf(o_lcfg, 1);
5100 o_buflen = o_lcfg->lcfg_buflens[1];
5101 memcpy(n_buf, mlfd->mlfd_newname, n_namelen);
5102 memcpy(n_buf + n_namelen, o_buf + o_namelen,
5103 o_buflen - o_namelen);
5104 lustre_cfg_bufs_set(n_bufs, 1, n_buf, o_buflen + diff);
5105 n_buf += round_up(o_buflen + diff, 8);
5107 /* buf[2] is the pool name, reuse it directly */
5108 lustre_cfg_bufs_set(n_bufs, 2, lustre_cfg_buf(o_lcfg, 2),
5109 o_lcfg->lcfg_buflens[2]);
5114 /* buf[3] is ostname */
5115 o_buf = lustre_cfg_buf(o_lcfg, 3);
5116 o_buflen = o_lcfg->lcfg_buflens[3];
5117 memcpy(n_buf, mlfd->mlfd_newname, n_namelen);
5118 memcpy(n_buf + n_namelen, o_buf + o_namelen,
5119 o_buflen - o_namelen);
5120 lustre_cfg_bufs_set(n_bufs, 3, n_buf, o_buflen + diff);
5125 o_buflen = o_lcfg->lcfg_buflens[1];
5126 if (o_buflen == sizeof(struct lov_desc) ||
5127 o_buflen == sizeof(struct lmv_desc)) {
5133 o_buf = lustre_cfg_buf(o_lcfg, 1);
5134 if (o_buflen == sizeof(struct lov_desc)) {
5135 struct lov_desc *o_desc =
5136 (struct lov_desc *)o_buf;
5137 struct lov_desc *n_desc =
5138 (struct lov_desc *)n_buf;
5141 o_uuid = o_desc->ld_uuid.uuid;
5142 n_uuid = n_desc->ld_uuid.uuid;
5143 uuid_len = sizeof(o_desc->ld_uuid.uuid);
5145 struct lmv_desc *o_desc =
5146 (struct lmv_desc *)o_buf;
5147 struct lmv_desc *n_desc =
5148 (struct lmv_desc *)n_buf;
5151 o_uuid = o_desc->ld_uuid.uuid;
5152 n_uuid = n_desc->ld_uuid.uuid;
5153 uuid_len = sizeof(o_desc->ld_uuid.uuid);
5156 if (unlikely(!contain_valid_fsname(o_uuid,
5157 mlfd->mlfd_oldname, uuid_len,
5159 lustre_cfg_bufs_set(n_bufs, 1, o_buf,
5164 memcpy(n_uuid, mlfd->mlfd_newname, n_namelen);
5165 uuid_len = strlen(o_uuid);
5166 if (uuid_len > o_namelen)
5167 memcpy(n_uuid + n_namelen,
5169 uuid_len - o_namelen);
5170 n_uuid[uuid_len + diff] = '\0';
5171 lustre_cfg_bufs_set(n_bufs, 1, n_buf, o_buflen);
5173 } /* else case fall through */
5174 } /* else case fall through */
5178 for (i = 1; i < cnt; i++) {
5179 o_buflen = o_lcfg->lcfg_buflens[i];
5183 o_buf = lustre_cfg_buf(o_lcfg, i);
5184 if (!contain_valid_fsname(o_buf, mlfd->mlfd_oldname,
5185 o_buflen, o_namelen)) {
5186 lustre_cfg_bufs_set(n_bufs, i, o_buf, o_buflen);
5190 memcpy(n_buf, mlfd->mlfd_newname, n_namelen);
5191 if (o_buflen == o_namelen) {
5192 lustre_cfg_bufs_set(n_bufs, i, n_buf,
5194 n_buf += round_up(n_namelen, 8);
5198 memcpy(n_buf + n_namelen, o_buf + o_namelen,
5199 o_buflen - o_namelen);
5200 lustre_cfg_bufs_set(n_bufs, i, n_buf, o_buflen + diff);
5201 n_buf += round_up(o_buflen + diff, 8);
5207 lcr = lustre_cfg_rec_new(cmd, n_bufs);
5211 lcr->lcr_cfg = *o_lcfg;
5212 rc = llog_write(env, mlfd->mlfd_llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
5213 lustre_cfg_rec_free(lcr);
5218 static int mgs_lcfg_fork_one(const struct lu_env *env, struct mgs_device *mgs,
5219 struct mgs_direntry *mde, const char *oldname,
5220 const char *newname)
5222 struct llog_handle *old_llh = NULL;
5223 struct llog_handle *new_llh = NULL;
5224 struct llog_ctxt *ctxt = NULL;
5225 struct mgs_lcfg_fork_data *mlfd = NULL;
5226 char *name_buf = NULL;
5228 int old_namelen = strlen(oldname);
5229 int new_namelen = strlen(newname);
5233 name_buflen = mde->mde_len + new_namelen - old_namelen;
5234 OBD_ALLOC(name_buf, name_buflen);
5238 memcpy(name_buf, newname, new_namelen);
5239 memcpy(name_buf + new_namelen, mde->mde_name + old_namelen,
5240 mde->mde_len - old_namelen);
5242 CDEBUG(D_MGS, "Fork the config-log from %s to %s\n",
5243 mde->mde_name, name_buf);
5245 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
5248 rc = llog_open_create(env, ctxt, &new_llh, NULL, name_buf);
5252 rc = llog_init_handle(env, new_llh, LLOG_F_IS_PLAIN, NULL);
5256 if (unlikely(mgs_log_is_empty(env, mgs, mde->mde_name)))
5259 rc = llog_open(env, ctxt, &old_llh, NULL, mde->mde_name,
5264 rc = llog_init_handle(env, old_llh, LLOG_F_IS_PLAIN, NULL);
5268 new_llh->lgh_hdr->llh_tgtuuid = old_llh->lgh_hdr->llh_tgtuuid;
5270 OBD_ALLOC(mlfd, LLOG_MIN_CHUNK_SIZE);
5272 GOTO(out, rc = -ENOMEM);
5274 mlfd->mlfd_mgs = mgs;
5275 mlfd->mlfd_llh = new_llh;
5276 mlfd->mlfd_oldname = oldname;
5277 mlfd->mlfd_newname = newname;
5279 rc = llog_process(env, old_llh, mgs_lcfg_fork_handler, mlfd, NULL);
5280 OBD_FREE(mlfd, LLOG_MIN_CHUNK_SIZE);
5286 llog_close(env, old_llh);
5288 llog_close(env, new_llh);
5290 OBD_FREE(name_buf, name_buflen);
5292 llog_ctxt_put(ctxt);
5297 int mgs_lcfg_fork(const struct lu_env *env, struct mgs_device *mgs,
5298 const char *oldname, const char *newname)
5300 struct list_head log_list;
5301 struct mgs_direntry *dirent, *n;
5302 int olen = strlen(oldname);
5303 int nlen = strlen(newname);
5308 if (unlikely(!oldname || oldname[0] == '\0' ||
5309 !newname || newname[0] == '\0'))
5312 if (strcmp(oldname, newname) == 0)
5315 /* lock it to prevent fork/erase/register in parallel. */
5316 mutex_lock(&mgs->mgs_mutex);
5318 rc = class_dentry_readdir(env, mgs, &log_list);
5320 mutex_unlock(&mgs->mgs_mutex);
5324 if (list_empty(&log_list)) {
5325 mutex_unlock(&mgs->mgs_mutex);
5329 list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
5332 ptr = strrchr(dirent->mde_name, '-');
5334 int tlen = ptr - dirent->mde_name;
5337 strncmp(newname, dirent->mde_name, tlen) == 0)
5338 GOTO(out, rc = -EEXIST);
5341 strncmp(oldname, dirent->mde_name, tlen) == 0)
5345 list_del_init(&dirent->mde_list);
5346 mgs_direntry_free(dirent);
5349 if (list_empty(&log_list)) {
5350 mutex_unlock(&mgs->mgs_mutex);
5354 list_for_each_entry(dirent, &log_list, mde_list) {
5355 rc = mgs_lcfg_fork_one(env, mgs, dirent, oldname, newname);
5363 mutex_unlock(&mgs->mgs_mutex);
5365 list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
5366 list_del_init(&dirent->mde_list);
5367 mgs_direntry_free(dirent);
5370 if (rc && count > 0)
5371 mgs_erase_logs(env, mgs, newname);
5376 int mgs_lcfg_erase(const struct lu_env *env, struct mgs_device *mgs,
5382 if (unlikely(!fsname || fsname[0] == '\0'))
5385 rc = mgs_erase_logs(env, mgs, fsname);
5390 static int mgs_xattr_del(const struct lu_env *env, struct dt_object *obj)
5392 struct dt_device *dev;
5393 struct thandle *th = NULL;
5398 dev = container_of(obj->do_lu.lo_dev, struct dt_device, dd_lu_dev);
5399 th = dt_trans_create(env, dev);
5401 RETURN(PTR_ERR(th));
5403 rc = dt_declare_xattr_del(env, obj, XATTR_TARGET_RENAME, th);
5407 rc = dt_trans_start_local(env, dev, th);
5411 dt_write_lock(env, obj, 0);
5412 rc = dt_xattr_del(env, obj, XATTR_TARGET_RENAME, th);
5417 dt_write_unlock(env, obj);
5420 dt_trans_stop(env, dev, th);
5425 int mgs_lcfg_rename(const struct lu_env *env, struct mgs_device *mgs)
5427 struct list_head log_list;
5428 struct mgs_direntry *dirent, *n;
5430 struct lu_buf buf = {
5432 .lb_len = sizeof(fsname)
5438 rc = class_dentry_readdir(env, mgs, &log_list);
5442 if (list_empty(&log_list))
5445 list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
5446 struct dt_object *o = NULL;
5451 list_del_init(&dirent->mde_list);
5452 ptr = strrchr(dirent->mde_name, '-');
5456 len = ptr - dirent->mde_name;
5457 if (unlikely(len >= sizeof(oldname))) {
5458 CDEBUG(D_MGS, "Skip invalid configuration file %s\n",
5463 o = local_file_find(env, mgs->mgs_los, mgs->mgs_configs_dir,
5467 CDEBUG(D_MGS, "Fail to locate file %s: rc = %d\n",
5468 dirent->mde_name, rc);
5472 rc = dt_xattr_get(env, o, &buf, XATTR_TARGET_RENAME);
5478 "Fail to get EA for %s: rc = %d\n",
5479 dirent->mde_name, rc);
5483 if (unlikely(rc == len &&
5484 memcmp(fsname, dirent->mde_name, len) == 0)) {
5485 /* The new fsname is the same as the old one. */
5486 rc = mgs_xattr_del(env, o);
5490 memcpy(oldname, dirent->mde_name, len);
5491 oldname[len] = '\0';
5493 rc = mgs_lcfg_fork_one(env, mgs, dirent, oldname, fsname);
5494 if (rc && rc != -EEXIST) {
5495 CDEBUG(D_MGS, "Fail to fork %s: rc = %d\n",
5496 dirent->mde_name, rc);
5500 rc = mgs_erase_log(env, mgs, dirent->mde_name);
5502 CDEBUG(D_MGS, "Fail to erase old %s: rc = %d\n",
5503 dirent->mde_name, rc);
5504 /* keep it there if failed to remove it. */
5509 if (o && !IS_ERR(o))
5510 lu_object_put(env, &o->do_lu);
5512 mgs_direntry_free(dirent);
5517 list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
5518 list_del_init(&dirent->mde_list);
5519 mgs_direntry_free(dirent);
5525 /* Setup _mgs fsdb and log
5527 int mgs__mgs_fsdb_setup(const struct lu_env *env, struct mgs_device *mgs)
5529 struct fs_db *fsdb = NULL;
5533 rc = mgs_find_or_make_fsdb(env, mgs, MGSSELF_NAME, &fsdb);
5535 mgs_put_fsdb(mgs, fsdb);
5540 /* Setup params fsdb and log
5542 int mgs_params_fsdb_setup(const struct lu_env *env, struct mgs_device *mgs)
5544 struct fs_db *fsdb = NULL;
5545 struct llog_handle *params_llh = NULL;
5549 rc = mgs_find_or_make_fsdb(env, mgs, PARAMS_FILENAME, &fsdb);
5551 mutex_lock(&fsdb->fsdb_mutex);
5552 rc = record_start_log(env, mgs, ¶ms_llh, PARAMS_FILENAME);
5554 rc = record_end_log(env, ¶ms_llh);
5555 mutex_unlock(&fsdb->fsdb_mutex);
5556 mgs_put_fsdb(mgs, fsdb);
5562 /* Cleanup params fsdb and log
5564 int mgs_params_fsdb_cleanup(const struct lu_env *env, struct mgs_device *mgs)
5568 rc = mgs_erase_logs(env, mgs, PARAMS_FILENAME);
5569 return rc == -ENOENT ? 0 : rc;
5573 * Fill in the mgs_target_info based on data devname and param provide.
5575 * @env thread context
5577 * @mti mgs target info. We want to set this based other paramters
5578 * passed to this function. Once setup we write it to the config
5580 * @devname optional OBD device name
5581 * @param string that contains both what tunable to set and the value to
5584 * RETURN 0 for success
5585 * negative error number on failure
5587 static int mgs_set_conf_param(const struct lu_env *env, struct mgs_device *mgs,
5588 struct mgs_target_info *mti, const char *devname,
5591 struct fs_db *fsdb = NULL;
5596 /* lustre, lustre-mdtlov, lustre-client, lustre-MDT0000 */
5600 /* We have two possible cases here:
5602 * 1) the device name embedded in the param:
5603 * lustre-OST0000.osc.max_dirty_mb=32
5605 * 2) the file system name is embedded in
5606 * the param: lustre.sys.at.min=0
5608 len = strcspn(param, ".=");
5609 if (!len || param[len] == '=')
5612 if (len >= sizeof(mti->mti_svname))
5615 snprintf(mti->mti_svname, sizeof(mti->mti_svname),
5616 "%.*s", (int)len, param);
5619 rc = strscpy(mti->mti_svname, devname, sizeof(mti->mti_svname));
5624 if (!strlen(mti->mti_svname)) {
5625 LCONSOLE_ERROR_MSG(0x14d, "No target specified: %s\n", param);
5629 dev_type = mgs_parse_devname(mti->mti_svname, mti->mti_fsname,
5630 &mti->mti_stripe_index);
5632 /* For this case we have an invalid obd device name */
5634 CDEBUG(D_MGS, "%s don't contain an index\n", mti->mti_svname);
5635 strscpy(mti->mti_fsname, mti->mti_svname, MTI_NAME_MAXLEN);
5638 /* Not an obd device, assume devname is the fsname.
5639 * User might of only provided fsname and not obd device
5642 CDEBUG(D_MGS, "%s is seen as a file system name\n", mti->mti_svname);
5643 strscpy(mti->mti_fsname, mti->mti_svname, MTI_NAME_MAXLEN);
5648 GOTO(out, rc = dev_type);
5650 /* param related to llite isn't allowed to set by OST or MDT */
5651 if (dev_type & LDD_F_SV_TYPE_OST ||
5652 dev_type & LDD_F_SV_TYPE_MDT) {
5653 /* param related to llite isn't allowed to set by OST
5656 if (!strncmp(param, PARAM_LLITE,
5657 sizeof(PARAM_LLITE) - 1))
5658 GOTO(out, rc = -EINVAL);
5660 /* Strip -osc or -mdc suffix from svname */
5661 if (server_make_name(dev_type, mti->mti_stripe_index,
5662 mti->mti_fsname, mti->mti_svname,
5663 sizeof(mti->mti_svname)))
5664 GOTO(out, rc = -EINVAL);
5668 rc = strscpy(mti->mti_params, param, sizeof(mti->mti_params));
5672 CDEBUG(D_MGS, "set_conf_param fs='%s' device='%s' param='%s'\n",
5673 mti->mti_fsname, mti->mti_svname, mti->mti_params);
5675 rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb);
5679 if (!test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags) &&
5680 test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) {
5681 CERROR("No filesystem targets for %s. cfg_device from lctl "
5682 "is '%s'\n", mti->mti_fsname, mti->mti_svname);
5683 mgs_unlink_fsdb(mgs, fsdb);
5684 GOTO(out, rc = -EINVAL);
5688 * Revoke lock so everyone updates. Should be alright if
5689 * someone was already reading while we were updating the logs,
5690 * so we don't really need to hold the lock while we're
5693 mti->mti_flags = dev_type | LDD_F_PARAM;
5694 mutex_lock(&fsdb->fsdb_mutex);
5695 rc = mgs_write_log_param(env, mgs, fsdb, mti, mti->mti_params);
5696 mutex_unlock(&fsdb->fsdb_mutex);
5697 mgs_revoke_lock(mgs, fsdb, MGS_CFG_T_CONFIG);
5701 mgs_put_fsdb(mgs, fsdb);
5706 static int mgs_set_param2(const struct lu_env *env, struct mgs_device *mgs,
5707 struct mgs_target_info *mti, const char *param)
5709 struct fs_db *fsdb = NULL;
5714 rc = strscpy(mti->mti_params, param, sizeof(mti->mti_params));
5718 len = strcspn(param, ".=");
5719 if (len && param[len] != '=') {
5720 struct list_head *tmp;
5724 ptr = strchr(param, '.');
5726 len = strlen(param);
5729 if (len >= sizeof(mti->mti_svname))
5730 GOTO(out, rc = -E2BIG);
5732 snprintf(mti->mti_svname, sizeof(mti->mti_svname), "%.*s",
5735 mutex_lock(&mgs->mgs_mutex);
5736 if (unlikely(list_empty(&mgs->mgs_fs_db_list))) {
5737 mutex_unlock(&mgs->mgs_mutex);
5738 GOTO(out, rc = -ENODEV);
5741 list_for_each(tmp, &mgs->mgs_fs_db_list) {
5742 fsdb = list_entry(tmp, struct fs_db, fsdb_list);
5743 if (fsdb->fsdb_has_lproc_entry &&
5744 strcmp(fsdb->fsdb_name, "params") != 0 &&
5745 strstr(param, fsdb->fsdb_name)) {
5746 snprintf(mti->mti_svname,
5747 sizeof(mti->mti_svname), "%s",
5755 snprintf(mti->mti_svname, sizeof(mti->mti_svname),
5758 mutex_unlock(&mgs->mgs_mutex);
5760 snprintf(mti->mti_svname, sizeof(mti->mti_svname), "general");
5763 CDEBUG(D_MGS, "set_param2 fs='%s' device='%s' param='%s'\n",
5764 mti->mti_fsname, mti->mti_svname, mti->mti_params);
5766 /* The return value should be the device type i.e LDD_F_SV_TYPE_XXX.
5767 * A returned error tells us we don't have a target obd device.
5769 dev_type = server_name2index(mti->mti_svname, &mti->mti_stripe_index,
5774 /* the return value should be the device type i.e LDD_F_SV_TYPE_XXX.
5775 * Strip -osc or -mdc suffix from svname
5777 if ((dev_type & LDD_F_SV_TYPE_OST || dev_type & LDD_F_SV_TYPE_MDT) &&
5778 server_make_name(dev_type, mti->mti_stripe_index,
5779 mti->mti_fsname, mti->mti_svname,
5780 sizeof(mti->mti_svname)))
5781 GOTO(out, rc = -EINVAL);
5783 rc = mgs_find_or_make_fsdb(env, mgs, PARAMS_FILENAME, &fsdb);
5787 * Revoke lock so everyone updates. Should be alright if
5788 * someone was already reading while we were updating the logs,
5789 * so we don't really need to hold the lock while we're
5792 mti->mti_flags = dev_type | LDD_F_PARAM2;
5793 mutex_lock(&fsdb->fsdb_mutex);
5794 rc = mgs_write_log_param2(env, mgs, fsdb, mti, mti->mti_params);
5795 mutex_unlock(&fsdb->fsdb_mutex);
5796 mgs_revoke_lock(mgs, fsdb, MGS_CFG_T_PARAMS);
5797 mgs_put_fsdb(mgs, fsdb);
5802 /* Set a permanent (config log) param for a target or fs
5804 * @lcfg buf0 may contain the device (testfs-MDT0000) name
5805 * buf1 contains the single parameter
5807 int mgs_set_param(const struct lu_env *env, struct mgs_device *mgs,
5808 struct lustre_cfg *lcfg)
5810 const char *param = lustre_cfg_string(lcfg, 1);
5811 struct mgs_target_info *mti;
5814 /* Create a fake mti to hold everything */
5819 print_lustre_cfg(lcfg);
5821 if (lcfg->lcfg_command == LCFG_PARAM) {
5822 /* For the case of lctl conf_param devname can be
5823 * lustre, lustre-mdtlov, lustre-client, lustre-MDT0000
5825 const char *devname = lustre_cfg_string(lcfg, 0);
5827 rc = mgs_set_conf_param(env, mgs, mti, devname, param);
5829 /* In the case of lctl set_param -P lcfg[0] will always
5830 * be 'general'. At least for now.
5832 rc = mgs_set_param2(env, mgs, mti, param);
5840 static int mgs_write_log_pool(const struct lu_env *env,
5841 struct mgs_device *mgs, char *logname,
5842 struct fs_db *fsdb, char *tgtname,
5843 enum lcfg_command_type cmd,
5844 char *fsname, char *poolname,
5845 char *ostname, char *comment)
5847 struct llog_handle *llh = NULL;
5850 rc = record_start_log(env, mgs, &llh, logname);
5853 rc = record_marker(env, llh, fsdb, CM_START, tgtname, comment);
5856 rc = record_base(env, llh, tgtname, 0, cmd,
5857 fsname, poolname, ostname, NULL);
5860 rc = record_marker(env, llh, fsdb, CM_END, tgtname, comment);
5862 record_end_log(env, &llh);
5867 int mgs_pool_check_ostname(struct fs_db *fsdb, char *fsname, char *ostname)
5872 /* check if ostname match fsname */
5873 ptr = strrchr(ostname, '-');
5874 if (!ptr || (strncmp(fsname, ostname, ptr - ostname) != 0))
5878 if (sscanf(ptr, "OST%04x_UUID", &index) != 1)
5880 if (index > INDEX_MAP_MAX_VALUE)
5882 if (!test_bit(index, fsdb->fsdb_ost_index_map))
5889 int mgs_pool_sanity(const struct lu_env *env, struct mgs_device *mgs,
5890 struct fs_db *fsdb, struct mgs_target_info *mti,
5891 char *logname, char *devname, enum lcfg_command_type cmd,
5892 char *fsname, char *poolname, char *ostname)
5894 char *lov = fsdb->fsdb_clilov;
5898 status = mgs_search_pool(env, mgs, fsdb, mti, logname, lov,
5899 fsname, poolname, ostname);
5905 if (status >= POOL_STATUS_EXIST)
5909 if (status == POOL_STATUS_NONE)
5911 else if (status == POOL_STATUS_OST_EXIST)
5915 if (status == POOL_STATUS_NONE)
5917 if (status != POOL_STATUS_OST_EXIST)
5921 if (status == POOL_STATUS_NONE)
5923 if (status == POOL_STATUS_OST_EXIST)
5935 int mgs_pool_cmd(const struct lu_env *env, struct mgs_device *mgs,
5936 enum lcfg_command_type cmd, char *fsname,
5937 char *poolname, char *ostname)
5943 char *canceled_label = NULL;
5945 struct mgs_target_info *mti = NULL;
5949 if ((cmd == LCFG_POOL_REM || cmd == LCFG_POOL_ADD) && !ostname)
5951 if ((cmd == LCFG_POOL_DEL || cmd == LCFG_POOL_NEW) && ostname)
5954 rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
5956 CERROR("Can't get db for %s\n", fsname);
5959 if (test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) {
5960 CERROR("%s is not defined\n", fsname);
5961 mgs_unlink_fsdb(mgs, fsdb);
5962 GOTO(out_fsdb, rc = -EINVAL);
5965 label_sz = 10 + strlen(fsname) + strlen(poolname);
5967 rc = mgs_pool_check_ostname(fsdb, fsname, ostname);
5970 label_sz += strlen(ostname);
5973 OBD_ALLOC(label, label_sz);
5975 GOTO(out_fsdb, rc = -ENOMEM);
5979 sprintf(label, "new %s.%s", fsname, poolname);
5982 sprintf(label, "add %s.%s.%s", fsname, poolname, ostname);
5985 OBD_ALLOC(canceled_label, label_sz);
5986 if (canceled_label == NULL)
5987 GOTO(out_label, rc = -ENOMEM);
5988 sprintf(label, "rem %s.%s.%s", fsname, poolname, ostname);
5989 sprintf(canceled_label, "add %s.%s.%s",
5990 fsname, poolname, ostname);
5993 OBD_ALLOC(canceled_label, label_sz);
5994 if (canceled_label == NULL)
5995 GOTO(out_label, rc = -ENOMEM);
5997 sprintf(label, "del %s.%s", fsname, poolname);
5998 sprintf(canceled_label, "new %s.%s", fsname, poolname);
6006 GOTO(out_cancel, rc = -ENOMEM);
6007 strscpy(mti->mti_svname, "lov pool", sizeof(mti->mti_svname));
6009 mutex_lock(&fsdb->fsdb_mutex);
6011 rc = name_create(&logname, fsname, "-client");
6013 GOTO(out_unlock, rc);
6015 rc = mgs_pool_sanity(env, mgs, fsdb, mti, logname, fsdb->fsdb_clilov,
6016 cmd, fsname, poolname, ostname);
6018 GOTO(out_logname, rc);
6021 rc = mgs_modify(env, mgs, fsdb, mti, logname,
6022 fsdb->fsdb_clilov, canceled_label, CM_SKIP);
6025 GOTO(out_logname, rc);
6027 rc = mgs_write_log_pool(env, mgs, logname, fsdb, fsdb->fsdb_clilov,
6028 cmd, fsname, poolname, ostname, label);
6030 GOTO(out_logname, rc);
6032 name_destroy(&logname);
6034 /* write pool def to all MDT logs */
6035 for_each_set_bit(i, fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE) {
6036 rc = name_create_mdt_and_lov(&logname, &lovname, fsdb, i);
6038 GOTO(out_unlock, rc);
6041 rc = mgs_modify(env, mgs, fsdb, mti, logname, lovname,
6042 canceled_label, CM_SKIP);
6045 GOTO(out_names, rc);
6047 rc = mgs_write_log_pool(env, mgs, logname, fsdb, lovname, cmd,
6048 fsname, poolname, ostname, label);
6050 GOTO(out_names, rc);
6052 name_destroy(&logname);
6053 name_destroy(&lovname);
6055 mutex_unlock(&fsdb->fsdb_mutex);
6057 /* request for update */
6058 mgs_revoke_lock(mgs, fsdb, MGS_CFG_T_CONFIG);
6063 name_destroy(&lovname);
6065 name_destroy(&logname);
6067 mutex_unlock(&fsdb->fsdb_mutex);
6072 OBD_FREE(canceled_label, label_sz);
6074 OBD_FREE(label, label_sz);
6076 mgs_put_fsdb(mgs, fsdb);