4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2016, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 * lustre/mgs/mgs_llog.c
34 * Lustre Management Server (mgs) config llog creation
36 * Author: Nathan Rutman <nathan@clusterfs.com>
37 * Author: Alex Zhuravlev <bzzz@whamcloud.com>
38 * Author: Mikhail Pershin <tappro@whamcloud.com>
41 #define DEBUG_SUBSYSTEM S_MGS
42 #define D_MGS D_CONFIG
45 #include <lustre_ioctl.h>
46 #include <lustre_param.h>
47 #include <lustre_sec.h>
48 #include <lustre_quota.h>
50 #include "mgs_internal.h"
52 /********************** Class functions ********************/
54 /* Find all logs in CONFIG directory and link then into list */
55 int class_dentry_readdir(const struct lu_env *env,
56 struct mgs_device *mgs, struct list_head *log_list)
58 struct dt_object *dir = mgs->mgs_configs_dir;
59 const struct dt_it_ops *iops;
61 struct mgs_direntry *de;
65 INIT_LIST_HEAD(log_list);
68 LASSERT(dir->do_index_ops);
70 iops = &dir->do_index_ops->dio_it;
71 it = iops->init(env, dir, LUDA_64BITHASH);
75 rc = iops->load(env, it, 0);
81 key = (void *)iops->key(env, it);
83 CERROR("%s: key failed when listing %s: rc = %d\n",
84 mgs->mgs_obd->obd_name, MOUNT_CONFIGS_DIR,
88 key_sz = iops->key_size(env, it);
91 /* filter out "." and ".." entries */
95 if (key_sz == 2 && key[1] == '.')
99 /* filter out ".bak" files */
100 /* sizeof(".bak") - 1 == 3 */
102 !memcmp(".bak", key + key_sz - 3, 3)) {
103 CDEBUG(D_MGS, "Skipping backup file %.*s\n",
108 de = mgs_direntry_alloc(key_sz + 1);
114 memcpy(de->mde_name, key, key_sz);
115 de->mde_name[key_sz] = 0;
117 list_add(&de->mde_list, log_list);
120 rc = iops->next(env, it);
130 CERROR("%s: key failed when listing %s: rc = %d\n",
131 mgs->mgs_obd->obd_name, MOUNT_CONFIGS_DIR, rc);
135 /******************** DB functions *********************/
137 static inline int name_create(char **newname, char *prefix, char *suffix)
140 OBD_ALLOC(*newname, strlen(prefix) + strlen(suffix) + 1);
143 sprintf(*newname, "%s%s", prefix, suffix);
147 static inline void name_destroy(char **name)
150 OBD_FREE(*name, strlen(*name) + 1);
154 struct mgs_fsdb_handler_data
160 /* from the (client) config log, figure out:
161 1. which ost's/mdt's are configured (by index)
162 2. what the last config step is
163 3. COMPAT_18 osc name
165 /* It might be better to have a separate db file, instead of parsing the info
166 out of the client log. This is slow and potentially error-prone. */
167 static int mgs_fsdb_handler(const struct lu_env *env, struct llog_handle *llh,
168 struct llog_rec_hdr *rec, void *data)
170 struct mgs_fsdb_handler_data *d = data;
171 struct fs_db *fsdb = d->fsdb;
172 int cfg_len = rec->lrh_len;
173 char *cfg_buf = (char*) (rec + 1);
174 struct lustre_cfg *lcfg;
179 if (rec->lrh_type != OBD_CFG_REC) {
180 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
184 rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
186 CERROR("Insane cfg\n");
190 lcfg = (struct lustre_cfg *)cfg_buf;
192 CDEBUG(D_INFO, "cmd %x %s %s\n", lcfg->lcfg_command,
193 lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
195 /* Figure out ost indicies */
196 /* lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1 */
197 if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD ||
198 lcfg->lcfg_command == LCFG_LOV_DEL_OBD) {
199 index = simple_strtoul(lustre_cfg_string(lcfg, 2),
201 CDEBUG(D_MGS, "OST index for %s is %u (%s)\n",
202 lustre_cfg_string(lcfg, 1), index,
203 lustre_cfg_string(lcfg, 2));
204 set_bit(index, fsdb->fsdb_ost_index_map);
207 /* Figure out mdt indicies */
208 /* attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:1d834_MNT_client_03f */
209 if ((lcfg->lcfg_command == LCFG_ATTACH) &&
210 (strcmp(lustre_cfg_string(lcfg, 1), LUSTRE_MDC_NAME) == 0)) {
211 rc = server_name2index(lustre_cfg_string(lcfg, 0),
213 if (rc != LDD_F_SV_TYPE_MDT) {
214 CWARN("Unparsable MDC name %s, assuming index 0\n",
215 lustre_cfg_string(lcfg, 0));
219 CDEBUG(D_MGS, "MDT index is %u\n", index);
220 if (!test_bit(index, fsdb->fsdb_mdt_index_map)) {
221 set_bit(index, fsdb->fsdb_mdt_index_map);
222 fsdb->fsdb_mdt_count++;
227 * figure out the old config. fsdb_gen = 0 means old log
228 * It is obsoleted and not supported anymore
230 if (fsdb->fsdb_gen == 0) {
231 CERROR("Old config format is not supported\n");
236 * compat to 1.8, check osc name used by MDT0 to OSTs, bz18548.
238 if (!test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags) &&
239 lcfg->lcfg_command == LCFG_ATTACH &&
240 strcmp(lustre_cfg_string(lcfg, 1), LUSTRE_OSC_NAME) == 0) {
241 if (OBD_OCD_VERSION_MAJOR(d->ver) == 1 &&
242 OBD_OCD_VERSION_MINOR(d->ver) <= 8) {
243 CWARN("MDT using 1.8 OSC name scheme\n");
244 set_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags);
248 if (lcfg->lcfg_command == LCFG_MARKER) {
249 struct cfg_marker *marker;
250 marker = lustre_cfg_buf(lcfg, 1);
252 d->ver = marker->cm_vers;
254 /* Keep track of the latest marker step */
255 fsdb->fsdb_gen = max(fsdb->fsdb_gen, marker->cm_step);
261 /* fsdb->fsdb_mutex is already held in mgs_find_or_make_fsdb*/
262 static int mgs_get_fsdb_from_llog(const struct lu_env *env,
263 struct mgs_device *mgs,
267 struct llog_handle *loghandle;
268 struct llog_ctxt *ctxt;
269 struct mgs_fsdb_handler_data d = {
276 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
277 LASSERT(ctxt != NULL);
278 rc = name_create(&logname, fsdb->fsdb_name, "-client");
281 rc = llog_open_create(env, ctxt, &loghandle, NULL, logname);
285 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
289 if (llog_get_size(loghandle) <= 1)
290 set_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags);
292 rc = llog_process(env, loghandle, mgs_fsdb_handler, (void *)&d, NULL);
293 CDEBUG(D_INFO, "get_db = %d\n", rc);
295 llog_close(env, loghandle);
297 name_destroy(&logname);
304 static void mgs_free_fsdb_srpc(struct fs_db *fsdb)
306 struct mgs_tgt_srpc_conf *tgtconf;
308 /* free target-specific rules */
309 while (fsdb->fsdb_srpc_tgt) {
310 tgtconf = fsdb->fsdb_srpc_tgt;
311 fsdb->fsdb_srpc_tgt = tgtconf->mtsc_next;
313 LASSERT(tgtconf->mtsc_tgt);
315 sptlrpc_rule_set_free(&tgtconf->mtsc_rset);
316 OBD_FREE(tgtconf->mtsc_tgt, strlen(tgtconf->mtsc_tgt) + 1);
317 OBD_FREE_PTR(tgtconf);
320 /* free general rules */
321 sptlrpc_rule_set_free(&fsdb->fsdb_srpc_gen);
324 static void mgs_unlink_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
326 mutex_lock(&mgs->mgs_mutex);
327 if (likely(!list_empty(&fsdb->fsdb_list))) {
328 LASSERTF(atomic_read(&fsdb->fsdb_ref) >= 2,
329 "Invalid ref %d on %s\n",
330 atomic_read(&fsdb->fsdb_ref),
333 list_del_init(&fsdb->fsdb_list);
334 /* Drop the reference on the list.*/
335 mgs_put_fsdb(mgs, fsdb);
337 mutex_unlock(&mgs->mgs_mutex);
340 /* The caller must hold mgs->mgs_mutex. */
341 static inline struct fs_db *
342 mgs_find_fsdb_noref(struct mgs_device *mgs, const char *fsname)
345 struct list_head *tmp;
347 list_for_each(tmp, &mgs->mgs_fs_db_list) {
348 fsdb = list_entry(tmp, struct fs_db, fsdb_list);
349 if (strcmp(fsdb->fsdb_name, fsname) == 0)
356 /* The caller must hold mgs->mgs_mutex. */
357 static void mgs_remove_fsdb_by_name(struct mgs_device *mgs, const char *name)
361 fsdb = mgs_find_fsdb_noref(mgs, name);
363 list_del_init(&fsdb->fsdb_list);
364 /* Drop the reference on the list.*/
365 mgs_put_fsdb(mgs, fsdb);
369 /* The caller must hold mgs->mgs_mutex. */
370 struct fs_db *mgs_find_fsdb(struct mgs_device *mgs, const char *fsname)
374 fsdb = mgs_find_fsdb_noref(mgs, fsname);
376 atomic_inc(&fsdb->fsdb_ref);
381 /* The caller must hold mgs->mgs_mutex. */
382 static struct fs_db *mgs_new_fsdb(const struct lu_env *env,
383 struct mgs_device *mgs, char *fsname)
389 if (strlen(fsname) >= sizeof(fsdb->fsdb_name)) {
390 CERROR("fsname %s is too long\n", fsname);
392 RETURN(ERR_PTR(-EINVAL));
397 RETURN(ERR_PTR(-ENOMEM));
399 strncpy(fsdb->fsdb_name, fsname, sizeof(fsdb->fsdb_name));
400 mutex_init(&fsdb->fsdb_mutex);
401 INIT_LIST_HEAD(&fsdb->fsdb_list);
402 set_bit(FSDB_UDESC, &fsdb->fsdb_flags);
404 INIT_LIST_HEAD(&fsdb->fsdb_clients);
405 atomic_set(&fsdb->fsdb_notify_phase, 0);
406 init_waitqueue_head(&fsdb->fsdb_notify_waitq);
407 init_completion(&fsdb->fsdb_notify_comp);
409 if (strcmp(fsname, MGSSELF_NAME) == 0) {
410 set_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags);
411 fsdb->fsdb_mgs = mgs;
412 if (logname_is_barrier(fsname))
415 OBD_ALLOC(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
416 if (!fsdb->fsdb_mdt_index_map) {
417 CERROR("No memory for MDT index maps\n");
419 GOTO(err, rc = -ENOMEM);
422 OBD_ALLOC(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
423 if (!fsdb->fsdb_ost_index_map) {
424 CERROR("No memory for OST index maps\n");
426 GOTO(err, rc = -ENOMEM);
429 if (logname_is_barrier(fsname))
432 rc = name_create(&fsdb->fsdb_clilov, fsname, "-clilov");
436 rc = name_create(&fsdb->fsdb_clilmv, fsname, "-clilmv");
440 /* initialise data for NID table */
441 mgs_ir_init_fs(env, mgs, fsdb);
442 lproc_mgs_add_live(mgs, fsdb);
445 if (!test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
446 /* populate the db from the client llog */
447 rc = mgs_get_fsdb_from_llog(env, mgs, fsdb);
449 CERROR("Can't get db from client log %d\n", rc);
455 /* populate srpc rules from params llog */
456 rc = mgs_get_fsdb_srpc_from_llog(env, mgs, fsdb);
458 CERROR("Can't get db from params log %d\n", rc);
464 /* One ref is for the fsdb on the list.
465 * The other ref is for the caller. */
466 atomic_set(&fsdb->fsdb_ref, 2);
467 list_add(&fsdb->fsdb_list, &mgs->mgs_fs_db_list);
472 atomic_set(&fsdb->fsdb_ref, 1);
473 mgs_put_fsdb(mgs, fsdb);
478 static void mgs_free_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
480 LASSERT(list_empty(&fsdb->fsdb_list));
482 lproc_mgs_del_live(mgs, fsdb);
484 /* deinitialize fsr */
486 mgs_ir_fini_fs(mgs, fsdb);
488 if (fsdb->fsdb_ost_index_map)
489 OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
490 if (fsdb->fsdb_mdt_index_map)
491 OBD_FREE(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
492 name_destroy(&fsdb->fsdb_clilov);
493 name_destroy(&fsdb->fsdb_clilmv);
494 mgs_free_fsdb_srpc(fsdb);
498 void mgs_put_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
500 if (atomic_dec_and_test(&fsdb->fsdb_ref))
501 mgs_free_fsdb(mgs, fsdb);
504 int mgs_init_fsdb_list(struct mgs_device *mgs)
506 INIT_LIST_HEAD(&mgs->mgs_fs_db_list);
510 int mgs_cleanup_fsdb_list(struct mgs_device *mgs)
513 struct list_head *tmp, *tmp2;
515 mutex_lock(&mgs->mgs_mutex);
516 list_for_each_safe(tmp, tmp2, &mgs->mgs_fs_db_list) {
517 fsdb = list_entry(tmp, struct fs_db, fsdb_list);
518 list_del_init(&fsdb->fsdb_list);
519 mgs_put_fsdb(mgs, fsdb);
521 mutex_unlock(&mgs->mgs_mutex);
525 int mgs_find_or_make_fsdb(const struct lu_env *env, struct mgs_device *mgs,
526 char *name, struct fs_db **dbh)
532 mutex_lock(&mgs->mgs_mutex);
533 fsdb = mgs_find_fsdb(mgs, name);
535 fsdb = mgs_new_fsdb(env, mgs, name);
539 CDEBUG(D_MGS, "Created new db: rc = %d\n", rc);
541 mutex_unlock(&mgs->mgs_mutex);
551 -1= empty client log */
552 int mgs_check_index(const struct lu_env *env,
553 struct mgs_device *mgs,
554 struct mgs_target_info *mti)
561 LASSERT(!(mti->mti_flags & LDD_F_NEED_INDEX));
563 rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb);
565 CERROR("Can't get db for %s\n", mti->mti_fsname);
569 if (test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags))
572 if (mti->mti_flags & LDD_F_SV_TYPE_OST)
573 imap = fsdb->fsdb_ost_index_map;
574 else if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
575 imap = fsdb->fsdb_mdt_index_map;
577 GOTO(out, rc = -EINVAL);
579 if (test_bit(mti->mti_stripe_index, imap))
585 mgs_put_fsdb(mgs, fsdb);
589 static __inline__ int next_index(void *index_map, int map_len)
592 for (i = 0; i < map_len * 8; i++)
593 if (!test_bit(i, index_map)) {
596 CERROR("max index %d exceeded.\n", i);
601 0 newly marked as in use
603 +EALREADY for update of an old index */
604 static int mgs_set_index(const struct lu_env *env,
605 struct mgs_device *mgs,
606 struct mgs_target_info *mti)
613 rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb);
615 CERROR("Can't get db for %s\n", mti->mti_fsname);
619 mutex_lock(&fsdb->fsdb_mutex);
620 if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
621 imap = fsdb->fsdb_ost_index_map;
622 } else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
623 imap = fsdb->fsdb_mdt_index_map;
625 GOTO(out_up, rc = -EINVAL);
628 if (mti->mti_flags & LDD_F_NEED_INDEX) {
629 rc = next_index(imap, INDEX_MAP_SIZE);
631 GOTO(out_up, rc = -ERANGE);
632 mti->mti_stripe_index = rc;
635 /* the last index(0xffff) is reserved for default value. */
636 if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8 - 1) {
637 LCONSOLE_ERROR_MSG(0x13f, "Server %s requested index %u, "
638 "but index must be less than %u.\n",
639 mti->mti_svname, mti->mti_stripe_index,
640 INDEX_MAP_SIZE * 8 - 1);
641 GOTO(out_up, rc = -ERANGE);
644 if (test_bit(mti->mti_stripe_index, imap)) {
645 if ((mti->mti_flags & LDD_F_VIRGIN) &&
646 !(mti->mti_flags & LDD_F_WRITECONF)) {
647 LCONSOLE_ERROR_MSG(0x140, "Server %s requested index "
648 "%d, but that index is already in "
649 "use. Use --writeconf to force\n",
651 mti->mti_stripe_index);
652 GOTO(out_up, rc = -EADDRINUSE);
654 CDEBUG(D_MGS, "Server %s updating index %d\n",
655 mti->mti_svname, mti->mti_stripe_index);
656 GOTO(out_up, rc = EALREADY);
659 set_bit(mti->mti_stripe_index, imap);
660 if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
661 fsdb->fsdb_mdt_count++;
664 set_bit(mti->mti_stripe_index, imap);
665 clear_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags);
666 if (server_make_name(mti->mti_flags & ~(LDD_F_VIRGIN | LDD_F_WRITECONF),
667 mti->mti_stripe_index, mti->mti_fsname,
669 CERROR("unknown server type %#x\n", mti->mti_flags);
670 GOTO(out_up, rc = -EINVAL);
673 CDEBUG(D_MGS, "Set index for %s to %d\n", mti->mti_svname,
674 mti->mti_stripe_index);
676 GOTO(out_up, rc = 0);
679 mutex_unlock(&fsdb->fsdb_mutex);
680 mgs_put_fsdb(mgs, fsdb);
684 struct mgs_modify_lookup {
685 struct cfg_marker mml_marker;
689 static int mgs_check_record_match(const struct lu_env *env,
690 struct llog_handle *llh,
691 struct llog_rec_hdr *rec, void *data)
693 struct cfg_marker *mc_marker = data;
694 struct cfg_marker *marker;
695 struct lustre_cfg *lcfg = REC_DATA(rec);
696 int cfg_len = REC_DATA_LEN(rec);
701 if (rec->lrh_type != OBD_CFG_REC) {
702 CDEBUG(D_ERROR, "Unhandled lrh_type: %#x\n", rec->lrh_type);
706 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
708 CDEBUG(D_ERROR, "Insane cfg\n");
712 /* We only care about markers */
713 if (lcfg->lcfg_command != LCFG_MARKER)
716 marker = lustre_cfg_buf(lcfg, 1);
718 if (marker->cm_flags & CM_SKIP)
721 if ((strcmp(mc_marker->cm_comment, marker->cm_comment) == 0) &&
722 (strcmp(mc_marker->cm_tgtname, marker->cm_tgtname) == 0)) {
723 /* Found a non-skipped marker match */
724 CDEBUG(D_MGS, "Matched rec %u marker %d flag %x %s %s\n",
725 rec->lrh_index, marker->cm_step,
726 marker->cm_flags, marker->cm_tgtname,
728 rc = LLOG_PROC_BREAK;
735 * Check an existing config log record with matching comment and device
737 * 0 - checked successfully,
738 * LLOG_PROC_BREAK - record matches
741 static int mgs_check_marker(const struct lu_env *env, struct mgs_device *mgs,
742 struct fs_db *fsdb, struct mgs_target_info *mti,
743 char *logname, char *devname, char *comment)
745 struct llog_handle *loghandle;
746 struct llog_ctxt *ctxt;
747 struct cfg_marker *mc_marker;
752 LASSERT(mutex_is_locked(&fsdb->fsdb_mutex));
753 CDEBUG(D_MGS, "mgs check %s/%s/%s\n", logname, devname, comment);
755 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
756 LASSERT(ctxt != NULL);
757 rc = llog_open(env, ctxt, &loghandle, NULL, logname, LLOG_OPEN_EXISTS);
764 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
768 if (llog_get_size(loghandle) <= 1)
769 GOTO(out_close, rc = 0);
771 OBD_ALLOC_PTR(mc_marker);
773 GOTO(out_close, rc = -ENOMEM);
774 if (strlcpy(mc_marker->cm_comment, comment,
775 sizeof(mc_marker->cm_comment)) >=
776 sizeof(mc_marker->cm_comment))
777 GOTO(out_free, rc = -E2BIG);
778 if (strlcpy(mc_marker->cm_tgtname, devname,
779 sizeof(mc_marker->cm_tgtname)) >=
780 sizeof(mc_marker->cm_tgtname))
781 GOTO(out_free, rc = -E2BIG);
783 rc = llog_process(env, loghandle, mgs_check_record_match,
784 (void *)mc_marker, NULL);
787 OBD_FREE_PTR(mc_marker);
790 llog_close(env, loghandle);
792 if (rc && rc != LLOG_PROC_BREAK)
793 CDEBUG(D_ERROR, "%s: mgs check %s/%s failed: rc = %d\n",
794 mgs->mgs_obd->obd_name, mti->mti_svname, comment, rc);
799 static int mgs_modify_handler(const struct lu_env *env,
800 struct llog_handle *llh,
801 struct llog_rec_hdr *rec, void *data)
803 struct mgs_modify_lookup *mml = data;
804 struct cfg_marker *marker;
805 struct lustre_cfg *lcfg = REC_DATA(rec);
806 int cfg_len = REC_DATA_LEN(rec);
810 if (rec->lrh_type != OBD_CFG_REC) {
811 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
815 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
817 CERROR("Insane cfg\n");
821 /* We only care about markers */
822 if (lcfg->lcfg_command != LCFG_MARKER)
825 marker = lustre_cfg_buf(lcfg, 1);
826 if ((strcmp(mml->mml_marker.cm_comment, marker->cm_comment) == 0) &&
827 (strcmp(mml->mml_marker.cm_tgtname, marker->cm_tgtname) == 0) &&
828 !(marker->cm_flags & CM_SKIP)) {
829 /* Found a non-skipped marker match */
830 CDEBUG(D_MGS, "Changing rec %u marker %d %x->%x: %s %s\n",
831 rec->lrh_index, marker->cm_step,
832 marker->cm_flags, mml->mml_marker.cm_flags,
833 marker->cm_tgtname, marker->cm_comment);
834 /* Overwrite the old marker llog entry */
835 marker->cm_flags &= ~CM_EXCLUDE; /* in case we're unexcluding */
836 marker->cm_flags |= mml->mml_marker.cm_flags;
837 marker->cm_canceltime = mml->mml_marker.cm_canceltime;
838 rc = llog_write(env, llh, rec, rec->lrh_index);
847 * Modify an existing config log record (for CM_SKIP or CM_EXCLUDE)
849 * 0 - modified successfully,
850 * 1 - no modification was done
853 static int mgs_modify(const struct lu_env *env, struct mgs_device *mgs,
854 struct fs_db *fsdb, struct mgs_target_info *mti,
855 char *logname, char *devname, char *comment, int flags)
857 struct llog_handle *loghandle;
858 struct llog_ctxt *ctxt;
859 struct mgs_modify_lookup *mml;
864 LASSERT(mutex_is_locked(&fsdb->fsdb_mutex));
865 CDEBUG(D_MGS, "modify %s/%s/%s fl=%x\n", logname, devname, comment,
868 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
869 LASSERT(ctxt != NULL);
870 rc = llog_open(env, ctxt, &loghandle, NULL, logname, LLOG_OPEN_EXISTS);
877 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
881 if (llog_get_size(loghandle) <= 1)
882 GOTO(out_close, rc = 0);
886 GOTO(out_close, rc = -ENOMEM);
887 if (strlcpy(mml->mml_marker.cm_comment, comment,
888 sizeof(mml->mml_marker.cm_comment)) >=
889 sizeof(mml->mml_marker.cm_comment))
890 GOTO(out_free, rc = -E2BIG);
891 if (strlcpy(mml->mml_marker.cm_tgtname, devname,
892 sizeof(mml->mml_marker.cm_tgtname)) >=
893 sizeof(mml->mml_marker.cm_tgtname))
894 GOTO(out_free, rc = -E2BIG);
895 /* Modify mostly means cancel */
896 mml->mml_marker.cm_flags = flags;
897 mml->mml_marker.cm_canceltime = flags ? cfs_time_current_sec() : 0;
898 mml->mml_modified = 0;
899 rc = llog_process(env, loghandle, mgs_modify_handler, (void *)mml,
901 if (!rc && !mml->mml_modified)
908 llog_close(env, loghandle);
911 CERROR("%s: modify %s/%s failed: rc = %d\n",
912 mgs->mgs_obd->obd_name, mti->mti_svname, comment, rc);
917 /** This structure is passed to mgs_replace_handler */
918 struct mgs_replace_uuid_lookup {
919 /* Nids are replaced for this target device */
920 struct mgs_target_info target;
921 /* Temporary modified llog */
922 struct llog_handle *temp_llh;
923 /* Flag is set if in target block*/
924 int in_target_device;
925 /* Nids already added. Just skip (multiple nids) */
926 int device_nids_added;
927 /* Flag is set if this block should not be copied */
932 * Check: a) if block should be skipped
933 * b) is it target block
938 * \retval 0 should not to be skipped
939 * \retval 1 should to be skipped
941 static int check_markers(struct lustre_cfg *lcfg,
942 struct mgs_replace_uuid_lookup *mrul)
944 struct cfg_marker *marker;
946 /* Track markers. Find given device */
947 if (lcfg->lcfg_command == LCFG_MARKER) {
948 marker = lustre_cfg_buf(lcfg, 1);
949 /* Clean llog from records marked as CM_EXCLUDE.
950 CM_SKIP records are used for "active" command
951 and can be restored if needed */
952 if ((marker->cm_flags & (CM_EXCLUDE | CM_START)) ==
953 (CM_EXCLUDE | CM_START)) {
958 if ((marker->cm_flags & (CM_EXCLUDE | CM_END)) ==
959 (CM_EXCLUDE | CM_END)) {
964 if (strcmp(mrul->target.mti_svname, marker->cm_tgtname) == 0) {
965 LASSERT(!(marker->cm_flags & CM_START) ||
966 !(marker->cm_flags & CM_END));
967 if (marker->cm_flags & CM_START) {
968 mrul->in_target_device = 1;
969 mrul->device_nids_added = 0;
970 } else if (marker->cm_flags & CM_END)
971 mrul->in_target_device = 0;
978 static int record_base(const struct lu_env *env, struct llog_handle *llh,
979 char *cfgname, lnet_nid_t nid, int cmd,
980 char *s1, char *s2, char *s3, char *s4)
982 struct mgs_thread_info *mgi = mgs_env_info(env);
983 struct llog_cfg_rec *lcr;
986 CDEBUG(D_MGS, "lcfg %s %#x %s %s %s %s\n", cfgname,
987 cmd, s1, s2, s3, s4);
989 lustre_cfg_bufs_reset(&mgi->mgi_bufs, cfgname);
991 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, s1);
993 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 2, s2);
995 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 3, s3);
997 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 4, s4);
999 lcr = lustre_cfg_rec_new(cmd, &mgi->mgi_bufs);
1003 lcr->lcr_cfg.lcfg_nid = nid;
1004 rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1006 lustre_cfg_rec_free(lcr);
1010 "failed to write lcfg %s %#x %s %s %s %s: rc = %d\n",
1011 cfgname, cmd, s1, s2, s3, s4, rc);
1015 static inline int record_add_uuid(const struct lu_env *env,
1016 struct llog_handle *llh,
1017 uint64_t nid, char *uuid)
1019 return record_base(env, llh, NULL, nid, LCFG_ADD_UUID, uuid,
1023 static inline int record_add_conn(const struct lu_env *env,
1024 struct llog_handle *llh,
1025 char *devname, char *uuid)
1027 return record_base(env, llh, devname, 0, LCFG_ADD_CONN, uuid,
1031 static inline int record_attach(const struct lu_env *env,
1032 struct llog_handle *llh, char *devname,
1033 char *type, char *uuid)
1035 return record_base(env, llh, devname, 0, LCFG_ATTACH, type, uuid,
1039 static inline int record_setup(const struct lu_env *env,
1040 struct llog_handle *llh, char *devname,
1041 char *s1, char *s2, char *s3, char *s4)
1043 return record_base(env, llh, devname, 0, LCFG_SETUP, s1, s2, s3, s4);
1047 * \retval <0 record processing error
1048 * \retval n record is processed. No need copy original one.
1049 * \retval 0 record is not processed.
1051 static int process_command(const struct lu_env *env, struct lustre_cfg *lcfg,
1052 struct mgs_replace_uuid_lookup *mrul)
1059 if (lcfg->lcfg_command == LCFG_ADD_UUID) {
1060 /* LCFG_ADD_UUID command found. Let's skip original command
1061 and add passed nids */
1062 ptr = mrul->target.mti_params;
1063 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
1064 CDEBUG(D_MGS, "add nid %s with uuid %s, "
1065 "device %s\n", libcfs_nid2str(nid),
1066 mrul->target.mti_params,
1067 mrul->target.mti_svname);
1068 rc = record_add_uuid(env,
1069 mrul->temp_llh, nid,
1070 mrul->target.mti_params);
1075 if (nids_added == 0) {
1076 CERROR("No new nids were added, nid %s with uuid %s, "
1077 "device %s\n", libcfs_nid2str(nid),
1078 mrul->target.mti_params,
1079 mrul->target.mti_svname);
1082 mrul->device_nids_added = 1;
1088 if (mrul->device_nids_added && lcfg->lcfg_command == LCFG_SETUP) {
1089 /* LCFG_SETUP command found. UUID should be changed */
1090 rc = record_setup(env,
1092 /* devname the same */
1093 lustre_cfg_string(lcfg, 0),
1094 /* s1 is not changed */
1095 lustre_cfg_string(lcfg, 1),
1096 /* new uuid should be
1098 mrul->target.mti_params,
1099 /* s3 is not changed */
1100 lustre_cfg_string(lcfg, 3),
1101 /* s4 is not changed */
1102 lustre_cfg_string(lcfg, 4));
1106 /* Another commands in target device block */
1111 * Handler that called for every record in llog.
1112 * Records are processed in order they placed in llog.
1114 * \param[in] llh log to be processed
1115 * \param[in] rec current record
1116 * \param[in] data mgs_replace_uuid_lookup structure
1120 static int mgs_replace_handler(const struct lu_env *env,
1121 struct llog_handle *llh,
1122 struct llog_rec_hdr *rec,
1125 struct mgs_replace_uuid_lookup *mrul;
1126 struct lustre_cfg *lcfg = REC_DATA(rec);
1127 int cfg_len = REC_DATA_LEN(rec);
1131 mrul = (struct mgs_replace_uuid_lookup *)data;
1133 if (rec->lrh_type != OBD_CFG_REC) {
1134 CERROR("unhandled lrh_type: %#x, cmd %x %s %s\n",
1135 rec->lrh_type, lcfg->lcfg_command,
1136 lustre_cfg_string(lcfg, 0),
1137 lustre_cfg_string(lcfg, 1));
1141 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
1143 /* Do not copy any invalidated records */
1144 GOTO(skip_out, rc = 0);
1147 rc = check_markers(lcfg, mrul);
1148 if (rc || mrul->skip_it)
1149 GOTO(skip_out, rc = 0);
1151 /* Write to new log all commands outside target device block */
1152 if (!mrul->in_target_device)
1153 GOTO(copy_out, rc = 0);
1155 /* Skip all other LCFG_ADD_UUID and LCFG_ADD_CONN records
1156 (failover nids) for this target, assuming that if then
1157 primary is changing then so is the failover */
1158 if (mrul->device_nids_added &&
1159 (lcfg->lcfg_command == LCFG_ADD_UUID ||
1160 lcfg->lcfg_command == LCFG_ADD_CONN))
1161 GOTO(skip_out, rc = 0);
1163 rc = process_command(env, lcfg, mrul);
1170 /* Record is placed in temporary llog as is */
1171 rc = llog_write(env, mrul->temp_llh, rec, LLOG_NEXT_IDX);
1173 CDEBUG(D_MGS, "Copied idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
1174 rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
1175 lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
1179 CDEBUG(D_MGS, "Skipped idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
1180 rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
1181 lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
1185 static int mgs_log_is_empty(const struct lu_env *env,
1186 struct mgs_device *mgs, char *name)
1188 struct llog_ctxt *ctxt;
1191 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
1192 LASSERT(ctxt != NULL);
1194 rc = llog_is_empty(env, ctxt, name);
1195 llog_ctxt_put(ctxt);
1199 static int mgs_replace_nids_log(const struct lu_env *env,
1200 struct obd_device *mgs, struct fs_db *fsdb,
1201 char *logname, char *devname, char *nids)
1203 struct llog_handle *orig_llh, *backup_llh;
1204 struct llog_ctxt *ctxt;
1205 struct mgs_replace_uuid_lookup *mrul;
1206 struct mgs_device *mgs_dev = lu2mgs_dev(mgs->obd_lu_dev);
1207 static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" };
1212 CDEBUG(D_MGS, "Replace nids for %s in %s\n", devname, logname);
1214 ctxt = llog_get_context(mgs, LLOG_CONFIG_ORIG_CTXT);
1215 LASSERT(ctxt != NULL);
1217 if (mgs_log_is_empty(env, mgs_dev, logname)) {
1218 /* Log is empty. Nothing to replace */
1219 GOTO(out_put, rc = 0);
1222 OBD_ALLOC(backup, strlen(logname) + strlen(".bak") + 1);
1224 GOTO(out_put, rc = -ENOMEM);
1226 sprintf(backup, "%s.bak", logname);
1228 rc = llog_backup(env, mgs, ctxt, ctxt, logname, backup);
1230 /* Now erase original log file. Connections are not allowed.
1231 Backup is already saved */
1232 rc = llog_erase(env, ctxt, NULL, logname);
1235 } else if (rc != -ENOENT) {
1236 CERROR("%s: can't make backup for %s: rc = %d\n",
1237 mgs->obd_name, logname, rc);
1241 /* open local log */
1242 rc = llog_open_create(env, ctxt, &orig_llh, NULL, logname);
1244 GOTO(out_restore, rc);
1246 rc = llog_init_handle(env, orig_llh, LLOG_F_IS_PLAIN, &cfg_uuid);
1248 GOTO(out_closel, rc);
1250 /* open backup llog */
1251 rc = llog_open(env, ctxt, &backup_llh, NULL, backup,
1254 GOTO(out_closel, rc);
1256 rc = llog_init_handle(env, backup_llh, LLOG_F_IS_PLAIN, NULL);
1258 GOTO(out_close, rc);
1260 if (llog_get_size(backup_llh) <= 1)
1261 GOTO(out_close, rc = 0);
1263 OBD_ALLOC_PTR(mrul);
1265 GOTO(out_close, rc = -ENOMEM);
1266 /* devname is only needed information to replace UUID records */
1267 strlcpy(mrul->target.mti_svname, devname,
1268 sizeof(mrul->target.mti_svname));
1269 /* parse nids later */
1270 strlcpy(mrul->target.mti_params, nids, sizeof(mrul->target.mti_params));
1271 /* Copy records to this temporary llog */
1272 mrul->temp_llh = orig_llh;
1274 rc = llog_process(env, backup_llh, mgs_replace_handler,
1275 (void *)mrul, NULL);
1278 rc2 = llog_close(NULL, backup_llh);
1282 rc2 = llog_close(NULL, orig_llh);
1288 CERROR("%s: llog should be restored: rc = %d\n",
1290 rc2 = llog_backup(env, mgs, ctxt, ctxt, backup,
1293 CERROR("%s: can't restore backup %s: rc = %d\n",
1294 mgs->obd_name, logname, rc2);
1298 OBD_FREE(backup, strlen(backup) + 1);
1301 llog_ctxt_put(ctxt);
1304 CERROR("%s: failed to replace nids in log %s: rc = %d\n",
1305 mgs->obd_name, logname, rc);
1311 * Parse device name and get file system name and/or device index
1313 * \param[in] devname device name (ex. lustre-MDT0000)
1314 * \param[out] fsname file system name(optional)
1315 * \param[out] index device index(optional)
1319 static int mgs_parse_devname(char *devname, char *fsname, __u32 *index)
1324 /* Extract fsname */
1326 rc = server_name2fsname(devname, fsname, NULL);
1328 CDEBUG(D_MGS, "Device name %s without fsname\n",
1335 rc = server_name2index(devname, index, NULL);
1337 CDEBUG(D_MGS, "Device name %s with wrong index\n",
1346 /* This is only called during replace_nids */
1347 static int only_mgs_is_running(struct obd_device *mgs_obd)
1349 /* TDB: Is global variable with devices count exists? */
1350 int num_devices = get_devices_count();
1351 int num_exports = 0;
1352 struct obd_export *exp;
1354 spin_lock(&mgs_obd->obd_dev_lock);
1355 list_for_each_entry(exp, &mgs_obd->obd_exports, exp_obd_chain) {
1356 /* skip self export */
1357 if (exp == mgs_obd->obd_self_export)
1359 if (exp_connect_flags(exp) & OBD_CONNECT_MDS_MDS)
1364 CERROR("%s: node %s still connected during replace_nids "
1365 "connect_flags:%llx\n",
1367 libcfs_nid2str(exp->exp_nid_stats->nid),
1368 exp_connect_flags(exp));
1371 spin_unlock(&mgs_obd->obd_dev_lock);
1373 /* osd, MGS and MGC + self_export
1374 (wc -l /proc/fs/lustre/devices <= 2) && (non self exports == 0) */
1375 return (num_devices <= 3) && (num_exports == 0);
1378 static int name_create_mdt(char **logname, char *fsname, int i)
1382 sprintf(mdt_index, "-MDT%04x", i);
1383 return name_create(logname, fsname, mdt_index);
1387 * Replace nids for \a device to \a nids values
1389 * \param obd MGS obd device
1390 * \param devname nids need to be replaced for this device
1391 * (ex. lustre-OST0000)
1392 * \param nids nids list (ex. nid1,nid2,nid3)
1396 int mgs_replace_nids(const struct lu_env *env,
1397 struct mgs_device *mgs,
1398 char *devname, char *nids)
1400 /* Assume fsname is part of device name */
1401 char fsname[MTI_NAME_MAXLEN];
1405 struct fs_db *fsdb = NULL;
1408 struct obd_device *mgs_obd = mgs->mgs_obd;
1411 /* We can only change NIDs if no other nodes are connected */
1412 spin_lock(&mgs_obd->obd_dev_lock);
1413 conn_state = mgs_obd->obd_no_conn;
1414 mgs_obd->obd_no_conn = 1;
1415 spin_unlock(&mgs_obd->obd_dev_lock);
1417 /* We can not change nids if not only MGS is started */
1418 if (!only_mgs_is_running(mgs_obd)) {
1419 CERROR("Only MGS is allowed to be started\n");
1420 GOTO(out, rc = -EINPROGRESS);
1423 /* Get fsname and index*/
1424 rc = mgs_parse_devname(devname, fsname, &index);
1428 rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
1430 CERROR("%s: can't find fsdb: rc = %d\n", fsname, rc);
1434 /* Process client llogs */
1435 name_create(&logname, fsname, "-client");
1436 rc = mgs_replace_nids_log(env, mgs_obd, fsdb, logname, devname, nids);
1437 name_destroy(&logname);
1439 CERROR("%s: error while replacing NIDs for %s: rc = %d\n",
1440 fsname, devname, rc);
1444 /* Process MDT llogs */
1445 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
1446 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
1448 name_create_mdt(&logname, fsname, i);
1449 rc = mgs_replace_nids_log(env, mgs_obd, fsdb, logname, devname, nids);
1450 name_destroy(&logname);
1456 spin_lock(&mgs_obd->obd_dev_lock);
1457 mgs_obd->obd_no_conn = conn_state;
1458 spin_unlock(&mgs_obd->obd_dev_lock);
1461 mgs_put_fsdb(mgs, fsdb);
1466 static int record_lov_setup(const struct lu_env *env, struct llog_handle *llh,
1467 char *devname, struct lov_desc *desc)
1469 struct mgs_thread_info *mgi = mgs_env_info(env);
1470 struct llog_cfg_rec *lcr;
1473 lustre_cfg_bufs_reset(&mgi->mgi_bufs, devname);
1474 lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, desc, sizeof(*desc));
1475 lcr = lustre_cfg_rec_new(LCFG_SETUP, &mgi->mgi_bufs);
1479 rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1480 lustre_cfg_rec_free(lcr);
1484 static int record_lmv_setup(const struct lu_env *env, struct llog_handle *llh,
1485 char *devname, struct lmv_desc *desc)
1487 struct mgs_thread_info *mgi = mgs_env_info(env);
1488 struct llog_cfg_rec *lcr;
1491 lustre_cfg_bufs_reset(&mgi->mgi_bufs, devname);
1492 lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, desc, sizeof(*desc));
1493 lcr = lustre_cfg_rec_new(LCFG_SETUP, &mgi->mgi_bufs);
1497 rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1498 lustre_cfg_rec_free(lcr);
1502 static inline int record_mdc_add(const struct lu_env *env,
1503 struct llog_handle *llh,
1504 char *logname, char *mdcuuid,
1505 char *mdtuuid, char *index,
1508 return record_base(env,llh,logname,0,LCFG_ADD_MDC,
1509 mdtuuid,index,gen,mdcuuid);
1512 static inline int record_lov_add(const struct lu_env *env,
1513 struct llog_handle *llh,
1514 char *lov_name, char *ost_uuid,
1515 char *index, char *gen)
1517 return record_base(env, llh, lov_name, 0, LCFG_LOV_ADD_OBD,
1518 ost_uuid, index, gen, NULL);
1521 static inline int record_mount_opt(const struct lu_env *env,
1522 struct llog_handle *llh,
1523 char *profile, char *lov_name,
1526 return record_base(env, llh, NULL, 0, LCFG_MOUNTOPT,
1527 profile, lov_name, mdc_name, NULL);
1530 static int record_marker(const struct lu_env *env,
1531 struct llog_handle *llh,
1532 struct fs_db *fsdb, __u32 flags,
1533 char *tgtname, char *comment)
1535 struct mgs_thread_info *mgi = mgs_env_info(env);
1536 struct llog_cfg_rec *lcr;
1540 if (flags & CM_START)
1542 mgi->mgi_marker.cm_step = fsdb->fsdb_gen;
1543 mgi->mgi_marker.cm_flags = flags;
1544 mgi->mgi_marker.cm_vers = LUSTRE_VERSION_CODE;
1545 cplen = strlcpy(mgi->mgi_marker.cm_tgtname, tgtname,
1546 sizeof(mgi->mgi_marker.cm_tgtname));
1547 if (cplen >= sizeof(mgi->mgi_marker.cm_tgtname))
1549 cplen = strlcpy(mgi->mgi_marker.cm_comment, comment,
1550 sizeof(mgi->mgi_marker.cm_comment));
1551 if (cplen >= sizeof(mgi->mgi_marker.cm_comment))
1553 mgi->mgi_marker.cm_createtime = cfs_time_current_sec();
1554 mgi->mgi_marker.cm_canceltime = 0;
1555 lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL);
1556 lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, &mgi->mgi_marker,
1557 sizeof(mgi->mgi_marker));
1558 lcr = lustre_cfg_rec_new(LCFG_MARKER, &mgi->mgi_bufs);
1562 rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1563 lustre_cfg_rec_free(lcr);
1567 static int record_start_log(const struct lu_env *env, struct mgs_device *mgs,
1568 struct llog_handle **llh, char *name)
1570 static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" };
1571 struct llog_ctxt *ctxt;
1576 GOTO(out, rc = -EBUSY);
1578 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
1580 GOTO(out, rc = -ENODEV);
1581 LASSERT(ctxt->loc_obd == mgs->mgs_obd);
1583 rc = llog_open_create(env, ctxt, llh, NULL, name);
1586 rc = llog_init_handle(env, *llh, LLOG_F_IS_PLAIN, &cfg_uuid);
1588 llog_close(env, *llh);
1590 llog_ctxt_put(ctxt);
1593 CERROR("%s: can't start log %s: rc = %d\n",
1594 mgs->mgs_obd->obd_name, name, rc);
1600 static int record_end_log(const struct lu_env *env, struct llog_handle **llh)
1604 rc = llog_close(env, *llh);
1610 /******************** config "macros" *********************/
1612 /* write an lcfg directly into a log (with markers) */
1613 static int mgs_write_log_direct(const struct lu_env *env,
1614 struct mgs_device *mgs, struct fs_db *fsdb,
1615 char *logname, struct llog_cfg_rec *lcr,
1616 char *devname, char *comment)
1618 struct llog_handle *llh = NULL;
1623 rc = record_start_log(env, mgs, &llh, logname);
1627 /* FIXME These should be a single journal transaction */
1628 rc = record_marker(env, llh, fsdb, CM_START, devname, comment);
1631 rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1634 rc = record_marker(env, llh, fsdb, CM_END, devname, comment);
1638 record_end_log(env, &llh);
1642 /* write the lcfg in all logs for the given fs */
1643 static int mgs_write_log_direct_all(const struct lu_env *env,
1644 struct mgs_device *mgs,
1646 struct mgs_target_info *mti,
1647 struct llog_cfg_rec *lcr, char *devname,
1648 char *comment, int server_only)
1650 struct list_head log_list;
1651 struct mgs_direntry *dirent, *n;
1652 char *fsname = mti->mti_fsname;
1653 int rc = 0, len = strlen(fsname);
1656 /* Find all the logs in the CONFIGS directory */
1657 rc = class_dentry_readdir(env, mgs, &log_list);
1661 /* Could use fsdb index maps instead of directory listing */
1662 list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
1663 list_del_init(&dirent->mde_list);
1664 /* don't write to sptlrpc rule log */
1665 if (strstr(dirent->mde_name, "-sptlrpc") != NULL)
1668 /* caller wants write server logs only */
1669 if (server_only && strstr(dirent->mde_name, "-client") != NULL)
1672 if (strlen(dirent->mde_name) <= len ||
1673 strncmp(fsname, dirent->mde_name, len) != 0 ||
1674 dirent->mde_name[len] != '-')
1677 CDEBUG(D_MGS, "Changing log %s\n", dirent->mde_name);
1678 /* Erase any old settings of this same parameter */
1679 rc = mgs_modify(env, mgs, fsdb, mti, dirent->mde_name,
1680 devname, comment, CM_SKIP);
1682 CERROR("%s: Can't modify llog %s: rc = %d\n",
1683 mgs->mgs_obd->obd_name, dirent->mde_name, rc);
1686 /* Write the new one */
1687 rc = mgs_write_log_direct(env, mgs, fsdb, dirent->mde_name,
1688 lcr, devname, comment);
1690 CERROR("%s: writing log %s: rc = %d\n",
1691 mgs->mgs_obd->obd_name, dirent->mde_name, rc);
1693 mgs_direntry_free(dirent);
1699 static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
1700 struct mgs_device *mgs,
1702 struct mgs_target_info *mti,
1703 int index, char *logname);
1704 static int mgs_write_log_osc_to_lov(const struct lu_env *env,
1705 struct mgs_device *mgs,
1707 struct mgs_target_info *mti,
1708 char *logname, char *suffix, char *lovname,
1709 enum lustre_sec_part sec_part, int flags);
1710 static int name_create_mdt_and_lov(char **logname, char **lovname,
1711 struct fs_db *fsdb, int i);
1713 static int add_param(char *params, char *key, char *val)
1715 char *start = params + strlen(params);
1716 char *end = params + sizeof(((struct mgs_target_info *)0)->mti_params);
1720 keylen = strlen(key);
1721 if (start + 1 + keylen + strlen(val) >= end) {
1722 CERROR("params are too long: %s %s%s\n",
1723 params, key != NULL ? key : "", val);
1727 sprintf(start, " %s%s", key != NULL ? key : "", val);
1732 * Walk through client config log record and convert the related records
1735 static int mgs_steal_client_llog_handler(const struct lu_env *env,
1736 struct llog_handle *llh,
1737 struct llog_rec_hdr *rec, void *data)
1739 struct mgs_device *mgs;
1740 struct obd_device *obd;
1741 struct mgs_target_info *mti, *tmti;
1743 int cfg_len = rec->lrh_len;
1744 char *cfg_buf = (char*) (rec + 1);
1745 struct lustre_cfg *lcfg;
1747 struct llog_handle *mdt_llh = NULL;
1748 static int got_an_osc_or_mdc = 0;
1749 /* 0: not found any osc/mdc;
1753 static int last_step = -1;
1758 mti = ((struct temp_comp*)data)->comp_mti;
1759 tmti = ((struct temp_comp*)data)->comp_tmti;
1760 fsdb = ((struct temp_comp*)data)->comp_fsdb;
1761 obd = ((struct temp_comp *)data)->comp_obd;
1762 mgs = lu2mgs_dev(obd->obd_lu_dev);
1765 if (rec->lrh_type != OBD_CFG_REC) {
1766 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
1770 rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
1772 CERROR("Insane cfg\n");
1776 lcfg = (struct lustre_cfg *)cfg_buf;
1778 if (lcfg->lcfg_command == LCFG_MARKER) {
1779 struct cfg_marker *marker;
1780 marker = lustre_cfg_buf(lcfg, 1);
1781 if (!strncmp(marker->cm_comment, "add osc", 7) &&
1782 (marker->cm_flags & CM_START) &&
1783 !(marker->cm_flags & CM_SKIP)) {
1784 got_an_osc_or_mdc = 1;
1785 cplen = strlcpy(tmti->mti_svname, marker->cm_tgtname,
1786 sizeof(tmti->mti_svname));
1787 if (cplen >= sizeof(tmti->mti_svname))
1789 rc = record_start_log(env, mgs, &mdt_llh,
1793 rc = record_marker(env, mdt_llh, fsdb, CM_START,
1794 mti->mti_svname, "add osc(copied)");
1795 record_end_log(env, &mdt_llh);
1796 last_step = marker->cm_step;
1799 if (!strncmp(marker->cm_comment, "add osc", 7) &&
1800 (marker->cm_flags & CM_END) &&
1801 !(marker->cm_flags & CM_SKIP)) {
1802 LASSERT(last_step == marker->cm_step);
1804 got_an_osc_or_mdc = 0;
1805 memset(tmti, 0, sizeof(*tmti));
1806 rc = record_start_log(env, mgs, &mdt_llh,
1810 rc = record_marker(env, mdt_llh, fsdb, CM_END,
1811 mti->mti_svname, "add osc(copied)");
1812 record_end_log(env, &mdt_llh);
1815 if (!strncmp(marker->cm_comment, "add mdc", 7) &&
1816 (marker->cm_flags & CM_START) &&
1817 !(marker->cm_flags & CM_SKIP)) {
1818 got_an_osc_or_mdc = 2;
1819 last_step = marker->cm_step;
1820 memcpy(tmti->mti_svname, marker->cm_tgtname,
1821 strlen(marker->cm_tgtname));
1825 if (!strncmp(marker->cm_comment, "add mdc", 7) &&
1826 (marker->cm_flags & CM_END) &&
1827 !(marker->cm_flags & CM_SKIP)) {
1828 LASSERT(last_step == marker->cm_step);
1830 got_an_osc_or_mdc = 0;
1831 memset(tmti, 0, sizeof(*tmti));
1836 if (got_an_osc_or_mdc == 0 || last_step < 0)
1839 if (lcfg->lcfg_command == LCFG_ADD_UUID) {
1840 __u64 nodenid = lcfg->lcfg_nid;
1842 if (strlen(tmti->mti_uuid) == 0) {
1843 /* target uuid not set, this config record is before
1844 * LCFG_SETUP, this nid is one of target node nid.
1846 tmti->mti_nids[tmti->mti_nid_count] = nodenid;
1847 tmti->mti_nid_count++;
1849 char nidstr[LNET_NIDSTR_SIZE];
1851 /* failover node nid */
1852 libcfs_nid2str_r(nodenid, nidstr, sizeof(nidstr));
1853 rc = add_param(tmti->mti_params, PARAM_FAILNODE,
1860 if (lcfg->lcfg_command == LCFG_SETUP) {
1863 target = lustre_cfg_string(lcfg, 1);
1864 memcpy(tmti->mti_uuid, target, strlen(target));
1868 /* ignore client side sptlrpc_conf_log */
1869 if (lcfg->lcfg_command == LCFG_SPTLRPC_CONF)
1872 if (lcfg->lcfg_command == LCFG_ADD_MDC) {
1875 if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1)
1878 memcpy(tmti->mti_fsname, mti->mti_fsname,
1879 strlen(mti->mti_fsname));
1880 tmti->mti_stripe_index = index;
1882 rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb, tmti,
1883 mti->mti_stripe_index,
1885 memset(tmti, 0, sizeof(*tmti));
1889 if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD) {
1892 char *logname, *lovname;
1894 rc = name_create_mdt_and_lov(&logname, &lovname, fsdb,
1895 mti->mti_stripe_index);
1898 sprintf(mdt_index, "-MDT%04x", mti->mti_stripe_index);
1900 if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1) {
1901 name_destroy(&logname);
1902 name_destroy(&lovname);
1906 tmti->mti_stripe_index = index;
1907 rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, tmti, logname,
1910 name_destroy(&logname);
1911 name_destroy(&lovname);
1917 /* fsdb->fsdb_mutex is already held in mgs_write_log_target*/
1918 /* stealed from mgs_get_fsdb_from_llog*/
1919 static int mgs_steal_llog_for_mdt_from_client(const struct lu_env *env,
1920 struct mgs_device *mgs,
1922 struct temp_comp* comp)
1924 struct llog_handle *loghandle;
1925 struct mgs_target_info *tmti;
1926 struct llog_ctxt *ctxt;
1931 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
1932 LASSERT(ctxt != NULL);
1934 OBD_ALLOC_PTR(tmti);
1936 GOTO(out_ctxt, rc = -ENOMEM);
1938 comp->comp_tmti = tmti;
1939 comp->comp_obd = mgs->mgs_obd;
1941 rc = llog_open(env, ctxt, &loghandle, NULL, client_name,
1949 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
1951 GOTO(out_close, rc);
1953 rc = llog_process_or_fork(env, loghandle, mgs_steal_client_llog_handler,
1954 (void *)comp, NULL, false);
1955 CDEBUG(D_MGS, "steal llog re = %d\n", rc);
1957 llog_close(env, loghandle);
1961 llog_ctxt_put(ctxt);
1965 /* lmv is the second thing for client logs */
1966 /* copied from mgs_write_log_lov. Please refer to that. */
1967 static int mgs_write_log_lmv(const struct lu_env *env,
1968 struct mgs_device *mgs,
1970 struct mgs_target_info *mti,
1971 char *logname, char *lmvname)
1973 struct llog_handle *llh = NULL;
1974 struct lmv_desc *lmvdesc;
1979 CDEBUG(D_MGS, "Writing lmv(%s) log for %s\n", lmvname,logname);
1981 OBD_ALLOC_PTR(lmvdesc);
1982 if (lmvdesc == NULL)
1984 lmvdesc->ld_active_tgt_count = 0;
1985 lmvdesc->ld_tgt_count = 0;
1986 sprintf((char*)lmvdesc->ld_uuid.uuid, "%s_UUID", lmvname);
1987 uuid = (char *)lmvdesc->ld_uuid.uuid;
1989 rc = record_start_log(env, mgs, &llh, logname);
1992 rc = record_marker(env, llh, fsdb, CM_START, lmvname, "lmv setup");
1995 rc = record_attach(env, llh, lmvname, "lmv", uuid);
1998 rc = record_lmv_setup(env, llh, lmvname, lmvdesc);
2001 rc = record_marker(env, llh, fsdb, CM_END, lmvname, "lmv setup");
2005 record_end_log(env, &llh);
2007 OBD_FREE_PTR(lmvdesc);
2011 /* lov is the first thing in the mdt and client logs */
2012 static int mgs_write_log_lov(const struct lu_env *env, struct mgs_device *mgs,
2013 struct fs_db *fsdb, struct mgs_target_info *mti,
2014 char *logname, char *lovname)
2016 struct llog_handle *llh = NULL;
2017 struct lov_desc *lovdesc;
2022 CDEBUG(D_MGS, "Writing lov(%s) log for %s\n", lovname, logname);
2025 #01 L attach 0:lov_mdsA 1:lov 2:71ccb_lov_mdsA_19f961a9e1
2026 #02 L lov_setup 0:lov_mdsA 1:(struct lov_desc)
2027 uuid=lov1_UUID, stripe count=1, size=1048576, offset=0, pattern=0
2030 /* FIXME just make lov_setup accept empty desc (put uuid in buf 2) */
2031 OBD_ALLOC_PTR(lovdesc);
2032 if (lovdesc == NULL)
2034 lovdesc->ld_magic = LOV_DESC_MAGIC;
2035 lovdesc->ld_tgt_count = 0;
2036 /* Defaults. Can be changed later by lcfg config_param */
2037 lovdesc->ld_default_stripe_count = 1;
2038 lovdesc->ld_pattern = LOV_PATTERN_RAID0;
2039 lovdesc->ld_default_stripe_size = LOV_DESC_STRIPE_SIZE_DEFAULT;
2040 lovdesc->ld_default_stripe_offset = -1;
2041 lovdesc->ld_qos_maxage = LOV_DESC_QOS_MAXAGE_DEFAULT;
2042 sprintf((char*)lovdesc->ld_uuid.uuid, "%s_UUID", lovname);
2043 /* can these be the same? */
2044 uuid = (char *)lovdesc->ld_uuid.uuid;
2046 /* This should always be the first entry in a log.
2047 rc = mgs_clear_log(obd, logname); */
2048 rc = record_start_log(env, mgs, &llh, logname);
2051 /* FIXME these should be a single journal transaction */
2052 rc = record_marker(env, llh, fsdb, CM_START, lovname, "lov setup");
2055 rc = record_attach(env, llh, lovname, "lov", uuid);
2058 rc = record_lov_setup(env, llh, lovname, lovdesc);
2061 rc = record_marker(env, llh, fsdb, CM_END, lovname, "lov setup");
2066 record_end_log(env, &llh);
2068 OBD_FREE_PTR(lovdesc);
2072 /* add failnids to open log */
2073 static int mgs_write_log_failnids(const struct lu_env *env,
2074 struct mgs_target_info *mti,
2075 struct llog_handle *llh,
2078 char *failnodeuuid = NULL;
2079 char *ptr = mti->mti_params;
2084 #03 L add_uuid nid=uml1@tcp(0x20000c0a80201) nal=90 0: 1:uml1_UUID
2085 #04 L add_uuid nid=1@elan(0x1000000000001) nal=90 0: 1:uml1_UUID
2086 #05 L setup 0:OSC_uml1_ost1_mdsA 1:ost1_UUID 2:uml1_UUID
2087 #06 L add_uuid nid=uml2@tcp(0x20000c0a80202) nal=90 0: 1:uml2_UUID
2088 #0x L add_uuid nid=2@elan(0x1000000000002) nal=90 0: 1:uml2_UUID
2089 #07 L add_conn 0:OSC_uml1_ost1_mdsA 1:uml2_UUID
2093 * Pull failnid info out of params string, which may contain something
2094 * like "<nid1>,<nid2>:<nid3>,<nid4>". class_parse_nid() does not
2095 * complain about abnormal inputs like ",:<nid1>", "<nid1>:,<nid2>",
2096 * etc. However, convert_hostnames() should have caught those.
2098 while (class_find_param(ptr, PARAM_FAILNODE, &ptr) == 0) {
2099 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
2100 char nidstr[LNET_NIDSTR_SIZE];
2102 if (failnodeuuid == NULL) {
2103 /* We don't know the failover node name,
2104 * so just use the first nid as the uuid */
2105 libcfs_nid2str_r(nid, nidstr, sizeof(nidstr));
2106 rc = name_create(&failnodeuuid, nidstr, "");
2110 CDEBUG(D_MGS, "add nid %s for failover uuid %s, "
2112 libcfs_nid2str_r(nid, nidstr, sizeof(nidstr)),
2113 failnodeuuid, cliname);
2114 rc = record_add_uuid(env, llh, nid, failnodeuuid);
2116 * If *ptr is ':', we have added all NIDs for
2120 rc = record_add_conn(env, llh, cliname,
2122 name_destroy(&failnodeuuid);
2123 failnodeuuid = NULL;
2127 rc = record_add_conn(env, llh, cliname, failnodeuuid);
2128 name_destroy(&failnodeuuid);
2129 failnodeuuid = NULL;
2136 static int mgs_write_log_mdc_to_lmv(const struct lu_env *env,
2137 struct mgs_device *mgs,
2139 struct mgs_target_info *mti,
2140 char *logname, char *lmvname)
2142 struct llog_handle *llh = NULL;
2143 char *mdcname = NULL;
2144 char *nodeuuid = NULL;
2145 char *mdcuuid = NULL;
2146 char *lmvuuid = NULL;
2148 char nidstr[LNET_NIDSTR_SIZE];
2152 if (mgs_log_is_empty(env, mgs, logname)) {
2153 CERROR("log is empty! Logical error\n");
2157 CDEBUG(D_MGS, "adding mdc for %s to log %s:lmv(%s)\n",
2158 mti->mti_svname, logname, lmvname);
2160 libcfs_nid2str_r(mti->mti_nids[0], nidstr, sizeof(nidstr));
2161 rc = name_create(&nodeuuid, nidstr, "");
2164 rc = name_create(&mdcname, mti->mti_svname, "-mdc");
2167 rc = name_create(&mdcuuid, mdcname, "_UUID");
2170 rc = name_create(&lmvuuid, lmvname, "_UUID");
2174 rc = record_start_log(env, mgs, &llh, logname);
2177 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
2181 for (i = 0; i < mti->mti_nid_count; i++) {
2182 CDEBUG(D_MGS, "add nid %s for mdt\n",
2183 libcfs_nid2str_r(mti->mti_nids[i],
2184 nidstr, sizeof(nidstr)));
2186 rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid);
2191 rc = record_attach(env, llh, mdcname, LUSTRE_MDC_NAME, lmvuuid);
2194 rc = record_setup(env, llh, mdcname, mti->mti_uuid, nodeuuid,
2198 rc = mgs_write_log_failnids(env, mti, llh, mdcname);
2201 snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
2202 rc = record_mdc_add(env, llh, lmvname, mdcuuid, mti->mti_uuid,
2206 rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname,
2211 record_end_log(env, &llh);
2213 name_destroy(&lmvuuid);
2214 name_destroy(&mdcuuid);
2215 name_destroy(&mdcname);
2216 name_destroy(&nodeuuid);
2220 static inline int name_create_lov(char **lovname, char *mdtname,
2221 struct fs_db *fsdb, int index)
2224 if (index == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2225 return name_create(lovname, fsdb->fsdb_name, "-mdtlov");
2227 return name_create(lovname, mdtname, "-mdtlov");
2230 static int name_create_mdt_and_lov(char **logname, char **lovname,
2231 struct fs_db *fsdb, int i)
2235 rc = name_create_mdt(logname, fsdb->fsdb_name, i);
2239 if (i == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2240 rc = name_create(lovname, fsdb->fsdb_name, "-mdtlov");
2242 rc = name_create(lovname, *logname, "-mdtlov");
2244 name_destroy(logname);
2250 static inline int name_create_mdt_osc(char **oscname, char *ostname,
2251 struct fs_db *fsdb, int i)
2255 if (i == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2256 sprintf(suffix, "-osc");
2258 sprintf(suffix, "-osc-MDT%04x", i);
2259 return name_create(oscname, ostname, suffix);
2262 /* add new mdc to already existent MDS */
2263 static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
2264 struct mgs_device *mgs,
2266 struct mgs_target_info *mti,
2267 int mdt_index, char *logname)
2269 struct llog_handle *llh = NULL;
2270 char *nodeuuid = NULL;
2271 char *ospname = NULL;
2272 char *lovuuid = NULL;
2273 char *mdtuuid = NULL;
2274 char *svname = NULL;
2275 char *mdtname = NULL;
2276 char *lovname = NULL;
2278 char nidstr[LNET_NIDSTR_SIZE];
2282 if (mgs_log_is_empty(env, mgs, mti->mti_svname)) {
2283 CERROR("log is empty! Logical error\n");
2287 CDEBUG(D_MGS, "adding osp index %d to %s\n", mti->mti_stripe_index,
2290 rc = name_create_mdt(&mdtname, fsdb->fsdb_name, mti->mti_stripe_index);
2294 libcfs_nid2str_r(mti->mti_nids[0], nidstr, sizeof(nidstr));
2295 rc = name_create(&nodeuuid, nidstr, "");
2297 GOTO(out_destory, rc);
2299 rc = name_create(&svname, mdtname, "-osp");
2301 GOTO(out_destory, rc);
2303 sprintf(index_str, "-MDT%04x", mdt_index);
2304 rc = name_create(&ospname, svname, index_str);
2306 GOTO(out_destory, rc);
2308 rc = name_create_lov(&lovname, logname, fsdb, mdt_index);
2310 GOTO(out_destory, rc);
2312 rc = name_create(&lovuuid, lovname, "_UUID");
2314 GOTO(out_destory, rc);
2316 rc = name_create(&mdtuuid, mdtname, "_UUID");
2318 GOTO(out_destory, rc);
2320 rc = record_start_log(env, mgs, &llh, logname);
2322 GOTO(out_destory, rc);
2324 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
2327 GOTO(out_destory, rc);
2329 for (i = 0; i < mti->mti_nid_count; i++) {
2330 CDEBUG(D_MGS, "add nid %s for mdt\n",
2331 libcfs_nid2str_r(mti->mti_nids[i],
2332 nidstr, sizeof(nidstr)));
2333 rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid);
2338 rc = record_attach(env, llh, ospname, LUSTRE_OSP_NAME, lovuuid);
2342 rc = record_setup(env, llh, ospname, mti->mti_uuid, nodeuuid,
2347 rc = mgs_write_log_failnids(env, mti, llh, ospname);
2351 /* Add mdc(osp) to lod */
2352 snprintf(index_str, sizeof(index_str), "%d", mti->mti_stripe_index);
2353 rc = record_base(env, llh, lovname, 0, LCFG_ADD_MDC, mti->mti_uuid,
2354 index_str, "1", NULL);
2358 rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname, "add osp");
2363 record_end_log(env, &llh);
2366 name_destroy(&mdtuuid);
2367 name_destroy(&lovuuid);
2368 name_destroy(&lovname);
2369 name_destroy(&ospname);
2370 name_destroy(&svname);
2371 name_destroy(&nodeuuid);
2372 name_destroy(&mdtname);
2376 static int mgs_write_log_mdt0(const struct lu_env *env,
2377 struct mgs_device *mgs,
2379 struct mgs_target_info *mti)
2381 char *log = mti->mti_svname;
2382 struct llog_handle *llh = NULL;
2383 char *uuid, *lovname;
2385 char *ptr = mti->mti_params;
2386 int rc = 0, failout = 0;
2389 OBD_ALLOC(uuid, sizeof(struct obd_uuid));
2393 if (class_find_param(ptr, PARAM_FAILMODE, &ptr) == 0)
2394 failout = (strncmp(ptr, "failout", 7) == 0);
2396 rc = name_create(&lovname, log, "-mdtlov");
2399 if (mgs_log_is_empty(env, mgs, log)) {
2400 rc = mgs_write_log_lov(env, mgs, fsdb, mti, log, lovname);
2405 sprintf(mdt_index, "%d", mti->mti_stripe_index);
2407 rc = record_start_log(env, mgs, &llh, log);
2411 /* add MDT itself */
2413 /* FIXME this whole fn should be a single journal transaction */
2414 sprintf(uuid, "%s_UUID", log);
2415 rc = record_marker(env, llh, fsdb, CM_START, log, "add mdt");
2418 rc = record_attach(env, llh, log, LUSTRE_MDT_NAME, uuid);
2421 rc = record_mount_opt(env, llh, log, lovname, NULL);
2424 rc = record_setup(env, llh, log, uuid, mdt_index, lovname,
2425 failout ? "n" : "f");
2428 rc = record_marker(env, llh, fsdb, CM_END, log, "add mdt");
2432 record_end_log(env, &llh);
2434 name_destroy(&lovname);
2436 OBD_FREE(uuid, sizeof(struct obd_uuid));
2440 /* envelope method for all layers log */
2441 static int mgs_write_log_mdt(const struct lu_env *env,
2442 struct mgs_device *mgs,
2444 struct mgs_target_info *mti)
2446 struct mgs_thread_info *mgi = mgs_env_info(env);
2447 struct llog_handle *llh = NULL;
2452 CDEBUG(D_MGS, "writing new mdt %s\n", mti->mti_svname);
2454 if (mti->mti_uuid[0] == '\0') {
2455 /* Make up our own uuid */
2456 snprintf(mti->mti_uuid, sizeof(mti->mti_uuid),
2457 "%s_UUID", mti->mti_svname);
2461 rc = mgs_write_log_mdt0(env, mgs, fsdb, mti);
2464 /* Append the mdt info to the client log */
2465 rc = name_create(&cliname, mti->mti_fsname, "-client");
2469 if (mgs_log_is_empty(env, mgs, cliname)) {
2470 /* Start client log */
2471 rc = mgs_write_log_lov(env, mgs, fsdb, mti, cliname,
2475 rc = mgs_write_log_lmv(env, mgs, fsdb, mti, cliname,
2482 #09 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0: 1:uml1_UUID
2483 #10 L attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:1d834_MNT_client_03f
2484 #11 L setup 0:MDC_uml1_mdsA_MNT_client 1:mdsA_UUID 2:uml1_UUID
2485 #12 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0: 1:uml2_UUID
2486 #13 L add_conn 0:MDC_uml1_mdsA_MNT_client 1:uml2_UUID
2487 #14 L mount_option 0: 1:client 2:lov1 3:MDC_uml1_mdsA_MNT_client
2490 /* copy client info about lov/lmv */
2491 mgi->mgi_comp.comp_mti = mti;
2492 mgi->mgi_comp.comp_fsdb = fsdb;
2494 rc = mgs_steal_llog_for_mdt_from_client(env, mgs, cliname,
2498 rc = mgs_write_log_mdc_to_lmv(env, mgs, fsdb, mti, cliname,
2504 rc = record_start_log(env, mgs, &llh, cliname);
2508 rc = record_marker(env, llh, fsdb, CM_START, cliname,
2512 rc = record_mount_opt(env, llh, cliname, fsdb->fsdb_clilov,
2516 rc = record_marker(env, llh, fsdb, CM_END, cliname,
2522 /* for_all_existing_mdt except current one */
2523 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
2524 if (i != mti->mti_stripe_index &&
2525 test_bit(i, fsdb->fsdb_mdt_index_map)) {
2528 rc = name_create_mdt(&logname, fsdb->fsdb_name, i);
2532 /* NB: If the log for the MDT is empty, it means
2533 * the MDT is only added to the index
2534 * map, and not being process yet, i.e. this
2535 * is an unregistered MDT, see mgs_write_log_target().
2536 * so we should skip it. Otherwise
2538 * 1. MGS get register request for MDT1 and MDT2.
2540 * 2. Then both MDT1 and MDT2 are added into
2541 * fsdb_mdt_index_map. (see mgs_set_index()).
2543 * 3. Then MDT1 get the lock of fsdb_mutex, then
2544 * generate the config log, here, it will regard MDT2
2545 * as an existent MDT, and generate "add osp" for
2546 * lustre-MDT0001-osp-MDT0002. Note: at the moment
2547 * MDT0002 config log is still empty, so it will
2548 * add "add osp" even before "lov setup", which
2549 * will definitly cause trouble.
2551 * 4. MDT1 registeration finished, fsdb_mutex is
2552 * released, then MDT2 get in, then in above
2553 * mgs_steal_llog_for_mdt_from_client(), it will
2554 * add another osp log for lustre-MDT0001-osp-MDT0002,
2555 * which will cause another trouble.*/
2556 if (!mgs_log_is_empty(env, mgs, logname))
2557 rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb,
2560 name_destroy(&logname);
2566 record_end_log(env, &llh);
2568 name_destroy(&cliname);
2572 /* Add the ost info to the client/mdt lov */
2573 static int mgs_write_log_osc_to_lov(const struct lu_env *env,
2574 struct mgs_device *mgs, struct fs_db *fsdb,
2575 struct mgs_target_info *mti,
2576 char *logname, char *suffix, char *lovname,
2577 enum lustre_sec_part sec_part, int flags)
2579 struct llog_handle *llh = NULL;
2580 char *nodeuuid = NULL;
2581 char *oscname = NULL;
2582 char *oscuuid = NULL;
2583 char *lovuuid = NULL;
2584 char *svname = NULL;
2586 char nidstr[LNET_NIDSTR_SIZE];
2590 CDEBUG(D_INFO, "adding osc for %s to log %s\n",
2591 mti->mti_svname, logname);
2593 if (mgs_log_is_empty(env, mgs, logname)) {
2594 CERROR("log is empty! Logical error\n");
2598 libcfs_nid2str_r(mti->mti_nids[0], nidstr, sizeof(nidstr));
2599 rc = name_create(&nodeuuid, nidstr, "");
2602 rc = name_create(&svname, mti->mti_svname, "-osc");
2606 /* for the system upgraded from old 1.8, keep using the old osc naming
2607 * style for mdt, see name_create_mdt_osc(). LU-1257 */
2608 if (test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2609 rc = name_create(&oscname, svname, "");
2611 rc = name_create(&oscname, svname, suffix);
2615 rc = name_create(&oscuuid, oscname, "_UUID");
2618 rc = name_create(&lovuuid, lovname, "_UUID");
2624 #03 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0: 1:uml1_UUID
2626 #04 L add_uuid nid=1@elan(0x1000000000001) nal=90 0: 1:uml1_UUID
2627 #04 L attach 0:OSC_uml1_ost1_MNT_client 1:osc 2:89070_lov1_a41dff51a
2628 #05 L setup 0:OSC_uml1_ost1_MNT_client 1:ost1_UUID 2:uml1_UUID
2630 #06 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0: 1:uml2_UUID
2631 #07 L add_conn 0:OSC_uml1_ost1_MNT_client 1:uml2_UUID
2632 #08 L lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1
2635 rc = record_start_log(env, mgs, &llh, logname);
2639 /* FIXME these should be a single journal transaction */
2640 rc = record_marker(env, llh, fsdb, CM_START | flags, mti->mti_svname,
2645 /* NB: don't change record order, because upon MDT steal OSC config
2646 * from client, it treats all nids before LCFG_SETUP as target nids
2647 * (multiple interfaces), while nids after as failover node nids.
2648 * See mgs_steal_client_llog_handler() LCFG_ADD_UUID.
2650 for (i = 0; i < mti->mti_nid_count; i++) {
2651 CDEBUG(D_MGS, "add nid %s\n",
2652 libcfs_nid2str_r(mti->mti_nids[i],
2653 nidstr, sizeof(nidstr)));
2654 rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid);
2658 rc = record_attach(env, llh, oscname, LUSTRE_OSC_NAME, lovuuid);
2661 rc = record_setup(env, llh, oscname, mti->mti_uuid, nodeuuid,
2665 rc = mgs_write_log_failnids(env, mti, llh, oscname);
2669 snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
2671 rc = record_lov_add(env, llh, lovname, mti->mti_uuid, index, "1");
2674 rc = record_marker(env, llh, fsdb, CM_END | flags, mti->mti_svname,
2679 record_end_log(env, &llh);
2681 name_destroy(&lovuuid);
2682 name_destroy(&oscuuid);
2683 name_destroy(&oscname);
2684 name_destroy(&svname);
2685 name_destroy(&nodeuuid);
2689 static int mgs_write_log_ost(const struct lu_env *env,
2690 struct mgs_device *mgs, struct fs_db *fsdb,
2691 struct mgs_target_info *mti)
2693 struct llog_handle *llh = NULL;
2694 char *logname, *lovname;
2695 char *ptr = mti->mti_params;
2696 int rc, flags = 0, failout = 0, i;
2699 CDEBUG(D_MGS, "writing new ost %s\n", mti->mti_svname);
2701 /* The ost startup log */
2703 /* If the ost log already exists, that means that someone reformatted
2704 the ost and it called target_add again. */
2705 if (!mgs_log_is_empty(env, mgs, mti->mti_svname)) {
2706 LCONSOLE_ERROR_MSG(0x141, "The config log for %s already "
2707 "exists, yet the server claims it never "
2708 "registered. It may have been reformatted, "
2709 "or the index changed. writeconf the MDT to "
2710 "regenerate all logs.\n", mti->mti_svname);
2715 attach obdfilter ost1 ost1_UUID
2716 setup /dev/loop2 ldiskfs f|n errors=remount-ro,user_xattr
2718 if (class_find_param(ptr, PARAM_FAILMODE, &ptr) == 0)
2719 failout = (strncmp(ptr, "failout", 7) == 0);
2720 rc = record_start_log(env, mgs, &llh, mti->mti_svname);
2723 /* FIXME these should be a single journal transaction */
2724 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,"add ost");
2727 if (*mti->mti_uuid == '\0')
2728 snprintf(mti->mti_uuid, sizeof(mti->mti_uuid),
2729 "%s_UUID", mti->mti_svname);
2730 rc = record_attach(env, llh, mti->mti_svname,
2731 "obdfilter"/*LUSTRE_OST_NAME*/, mti->mti_uuid);
2734 rc = record_setup(env, llh, mti->mti_svname,
2735 "dev"/*ignored*/, "type"/*ignored*/,
2736 failout ? "n" : "f", NULL/*options*/);
2739 rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname, "add ost");
2743 record_end_log(env, &llh);
2746 /* We also have to update the other logs where this osc is part of
2749 if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags)) {
2750 /* If we're upgrading, the old mdt log already has our
2751 entry. Let's do a fake one for fun. */
2752 /* Note that we can't add any new failnids, since we don't
2753 know the old osc names. */
2754 flags = CM_SKIP | CM_UPGRADE146;
2756 } else if ((mti->mti_flags & LDD_F_UPDATE) != LDD_F_UPDATE) {
2757 /* If the update flag isn't set, don't update client/mdt
2760 LCONSOLE_WARN("Client log for %s was not updated; writeconf "
2761 "the MDT first to regenerate it.\n",
2765 /* Add ost to all MDT lov defs */
2766 for (i = 0; i < INDEX_MAP_SIZE * 8; i++){
2767 if (test_bit(i, fsdb->fsdb_mdt_index_map)) {
2770 rc = name_create_mdt_and_lov(&logname, &lovname, fsdb,
2774 sprintf(mdt_index, "-MDT%04x", i);
2775 rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, mti,
2777 lovname, LUSTRE_SP_MDT,
2779 name_destroy(&logname);
2780 name_destroy(&lovname);
2786 /* Append ost info to the client log */
2787 rc = name_create(&logname, mti->mti_fsname, "-client");
2790 if (mgs_log_is_empty(env, mgs, logname)) {
2791 /* Start client log */
2792 rc = mgs_write_log_lov(env, mgs, fsdb, mti, logname,
2796 rc = mgs_write_log_lmv(env, mgs, fsdb, mti, logname,
2801 rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, mti, logname, "",
2802 fsdb->fsdb_clilov, LUSTRE_SP_CLI, flags);
2804 name_destroy(&logname);
2808 static __inline__ int mgs_param_empty(char *ptr)
2812 if ((tmp = strchr(ptr, '=')) && (*(++tmp) == '\0'))
2817 static int mgs_write_log_failnid_internal(const struct lu_env *env,
2818 struct mgs_device *mgs,
2820 struct mgs_target_info *mti,
2821 char *logname, char *cliname)
2824 struct llog_handle *llh = NULL;
2826 if (mgs_param_empty(mti->mti_params)) {
2827 /* Remove _all_ failnids */
2828 rc = mgs_modify(env, mgs, fsdb, mti, logname,
2829 mti->mti_svname, "add failnid", CM_SKIP);
2830 return rc < 0 ? rc : 0;
2833 /* Otherwise failover nids are additive */
2834 rc = record_start_log(env, mgs, &llh, logname);
2837 /* FIXME this should be a single journal transaction */
2838 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
2842 rc = mgs_write_log_failnids(env, mti, llh, cliname);
2845 rc = record_marker(env, llh, fsdb, CM_END,
2846 mti->mti_svname, "add failnid");
2848 record_end_log(env, &llh);
2853 /* Add additional failnids to an existing log.
2854 The mdc/osc must have been added to logs first */
2855 /* tcp nids must be in dotted-quad ascii -
2856 we can't resolve hostnames from the kernel. */
2857 static int mgs_write_log_add_failnid(const struct lu_env *env,
2858 struct mgs_device *mgs,
2860 struct mgs_target_info *mti)
2862 char *logname, *cliname;
2866 /* FIXME we currently can't erase the failnids
2867 * given when a target first registers, since they aren't part of
2868 * an "add uuid" stanza */
2870 /* Verify that we know about this target */
2871 if (mgs_log_is_empty(env, mgs, mti->mti_svname)) {
2872 LCONSOLE_ERROR_MSG(0x142, "The target %s has not registered "
2873 "yet. It must be started before failnids "
2874 "can be added.\n", mti->mti_svname);
2878 /* Create mdc/osc client name (e.g. lustre-OST0001-osc) */
2879 if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
2880 rc = name_create(&cliname, mti->mti_svname, "-mdc");
2881 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
2882 rc = name_create(&cliname, mti->mti_svname, "-osc");
2888 /* Add failover nids to the client log */
2889 rc = name_create(&logname, mti->mti_fsname, "-client");
2891 name_destroy(&cliname);
2894 rc = mgs_write_log_failnid_internal(env, mgs, fsdb,mti,logname,cliname);
2895 name_destroy(&logname);
2896 name_destroy(&cliname);
2900 if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
2901 /* Add OST failover nids to the MDT logs as well */
2904 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
2905 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
2907 rc = name_create_mdt(&logname, mti->mti_fsname, i);
2910 rc = name_create_mdt_osc(&cliname, mti->mti_svname,
2913 name_destroy(&logname);
2916 rc = mgs_write_log_failnid_internal(env, mgs, fsdb,
2919 name_destroy(&cliname);
2920 name_destroy(&logname);
2929 static int mgs_wlp_lcfg(const struct lu_env *env,
2930 struct mgs_device *mgs, struct fs_db *fsdb,
2931 struct mgs_target_info *mti,
2932 char *logname, struct lustre_cfg_bufs *bufs,
2933 char *tgtname, char *ptr)
2935 char comment[MTI_NAME_MAXLEN];
2937 struct llog_cfg_rec *lcr;
2940 /* Erase any old settings of this same parameter */
2941 memcpy(comment, ptr, MTI_NAME_MAXLEN);
2942 comment[MTI_NAME_MAXLEN - 1] = 0;
2943 /* But don't try to match the value. */
2944 tmp = strchr(comment, '=');
2947 /* FIXME we should skip settings that are the same as old values */
2948 rc = mgs_modify(env, mgs, fsdb, mti, logname, tgtname, comment,CM_SKIP);
2951 del = mgs_param_empty(ptr);
2953 LCONSOLE_INFO("%s parameter %s.%s in log %s\n", del ? "Disabling" : rc ?
2954 "Setting" : "Modifying", tgtname, comment, logname);
2956 /* mgs_modify() will return 1 if nothing had to be done */
2962 lustre_cfg_bufs_reset(bufs, tgtname);
2963 lustre_cfg_bufs_set_string(bufs, 1, ptr);
2964 if (mti->mti_flags & LDD_F_PARAM2)
2965 lustre_cfg_bufs_set_string(bufs, 2, LCTL_UPCALL);
2967 lcr = lustre_cfg_rec_new((mti->mti_flags & LDD_F_PARAM2) ?
2968 LCFG_SET_PARAM : LCFG_PARAM, bufs);
2972 rc = mgs_write_log_direct(env, mgs, fsdb, logname, lcr, tgtname,
2974 lustre_cfg_rec_free(lcr);
2978 static int mgs_write_log_param2(const struct lu_env *env,
2979 struct mgs_device *mgs,
2981 struct mgs_target_info *mti, char *ptr)
2983 struct lustre_cfg_bufs bufs;
2987 CDEBUG(D_MGS, "next param '%s'\n", ptr);
2988 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, PARAMS_FILENAME, &bufs,
2989 mti->mti_svname, ptr);
2994 /* write global variable settings into log */
2995 static int mgs_write_log_sys(const struct lu_env *env,
2996 struct mgs_device *mgs, struct fs_db *fsdb,
2997 struct mgs_target_info *mti, char *sys, char *ptr)
2999 struct mgs_thread_info *mgi = mgs_env_info(env);
3000 struct lustre_cfg *lcfg;
3001 struct llog_cfg_rec *lcr;
3003 int rc, cmd, convert = 1;
3005 if (class_match_param(ptr, PARAM_TIMEOUT, &tmp) == 0) {
3006 cmd = LCFG_SET_TIMEOUT;
3007 } else if (class_match_param(ptr, PARAM_LDLM_TIMEOUT, &tmp) == 0) {
3008 cmd = LCFG_SET_LDLM_TIMEOUT;
3009 /* Check for known params here so we can return error to lctl */
3010 } else if ((class_match_param(ptr, PARAM_AT_MIN, &tmp) == 0) ||
3011 (class_match_param(ptr, PARAM_AT_MAX, &tmp) == 0) ||
3012 (class_match_param(ptr, PARAM_AT_EXTRA, &tmp) == 0) ||
3013 (class_match_param(ptr, PARAM_AT_EARLY_MARGIN, &tmp) == 0) ||
3014 (class_match_param(ptr, PARAM_AT_HISTORY, &tmp) == 0)) {
3016 } else if (class_match_param(ptr, PARAM_JOBID_VAR, &tmp) == 0) {
3017 convert = 0; /* Don't convert string value to integer */
3023 if (mgs_param_empty(ptr))
3024 CDEBUG(D_MGS, "global '%s' removed\n", sys);
3026 CDEBUG(D_MGS, "global '%s' val=%s\n", sys, tmp);
3028 lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL);
3029 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, sys);
3030 if (!convert && *tmp != '\0')
3031 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 2, tmp);
3032 lcr = lustre_cfg_rec_new(cmd, &mgi->mgi_bufs);
3036 lcfg = &lcr->lcr_cfg;
3037 lcfg->lcfg_num = convert ? simple_strtoul(tmp, NULL, 0) : 0;
3038 /* truncate the comment to the parameter name */
3042 /* modify all servers and clients */
3043 rc = mgs_write_log_direct_all(env, mgs, fsdb, mti,
3044 *tmp == '\0' ? NULL : lcr,
3045 mti->mti_fsname, sys, 0);
3046 if (rc == 0 && *tmp != '\0') {
3048 case LCFG_SET_TIMEOUT:
3049 if (!obd_timeout_set || lcfg->lcfg_num > obd_timeout)
3050 class_process_config(lcfg);
3052 case LCFG_SET_LDLM_TIMEOUT:
3053 if (!ldlm_timeout_set || lcfg->lcfg_num > ldlm_timeout)
3054 class_process_config(lcfg);
3061 lustre_cfg_rec_free(lcr);
3065 /* write quota settings into log */
3066 static int mgs_write_log_quota(const struct lu_env *env, struct mgs_device *mgs,
3067 struct fs_db *fsdb, struct mgs_target_info *mti,
3068 char *quota, char *ptr)
3070 struct mgs_thread_info *mgi = mgs_env_info(env);
3071 struct llog_cfg_rec *lcr;
3074 int rc, cmd = LCFG_PARAM;
3076 /* support only 'meta' and 'data' pools so far */
3077 if (class_match_param(ptr, QUOTA_METAPOOL_NAME, &tmp) != 0 &&
3078 class_match_param(ptr, QUOTA_DATAPOOL_NAME, &tmp) != 0) {
3079 CERROR("parameter quota.%s isn't supported (only quota.mdt "
3080 "& quota.ost are)\n", ptr);
3085 CDEBUG(D_MGS, "global '%s' removed\n", quota);
3087 CDEBUG(D_MGS, "global '%s'\n", quota);
3089 if (strchr(tmp, 'u') == NULL && strchr(tmp, 'g') == NULL &&
3090 strcmp(tmp, "none") != 0) {
3091 CERROR("enable option(%s) isn't supported\n", tmp);
3096 lustre_cfg_bufs_reset(&mgi->mgi_bufs, mti->mti_fsname);
3097 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, quota);
3098 lcr = lustre_cfg_rec_new(cmd, &mgi->mgi_bufs);
3102 /* truncate the comment to the parameter name */
3107 /* XXX we duplicated quota enable information in all server
3108 * config logs, it should be moved to a separate config
3109 * log once we cleanup the config log for global param. */
3110 /* modify all servers */
3111 rc = mgs_write_log_direct_all(env, mgs, fsdb, mti,
3112 *tmp == '\0' ? NULL : lcr,
3113 mti->mti_fsname, quota, 1);
3115 lustre_cfg_rec_free(lcr);
3116 return rc < 0 ? rc : 0;
3119 static int mgs_srpc_set_param_disk(const struct lu_env *env,
3120 struct mgs_device *mgs,
3122 struct mgs_target_info *mti,
3125 struct mgs_thread_info *mgi = mgs_env_info(env);
3126 struct llog_cfg_rec *lcr;
3127 struct llog_handle *llh = NULL;
3129 char *comment, *ptr;
3135 ptr = strchr(param, '=');
3136 LASSERT(ptr != NULL);
3139 OBD_ALLOC(comment, len + 1);
3140 if (comment == NULL)
3142 strncpy(comment, param, len);
3143 comment[len] = '\0';
3146 lustre_cfg_bufs_reset(&mgi->mgi_bufs, mti->mti_svname);
3147 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, param);
3148 lcr = lustre_cfg_rec_new(LCFG_SPTLRPC_CONF, &mgi->mgi_bufs);
3150 GOTO(out_comment, rc = -ENOMEM);
3152 /* construct log name */
3153 rc = name_create(&logname, mti->mti_fsname, "-sptlrpc");
3157 if (mgs_log_is_empty(env, mgs, logname)) {
3158 rc = record_start_log(env, mgs, &llh, logname);
3161 record_end_log(env, &llh);
3164 /* obsolete old one */
3165 rc = mgs_modify(env, mgs, fsdb, mti, logname, mti->mti_svname,
3169 /* write the new one */
3170 rc = mgs_write_log_direct(env, mgs, fsdb, logname, lcr,
3171 mti->mti_svname, comment);
3173 CERROR("%s: error writing log %s: rc = %d\n",
3174 mgs->mgs_obd->obd_name, logname, rc);
3176 name_destroy(&logname);
3178 lustre_cfg_rec_free(lcr);
3180 OBD_FREE(comment, len + 1);
3184 static int mgs_srpc_set_param_udesc_mem(struct fs_db *fsdb,
3189 /* disable the adjustable udesc parameter for now, i.e. use default
3190 * setting that client always ship udesc to MDT if possible. to enable
3191 * it simply remove the following line */
3194 ptr = strchr(param, '=');
3199 if (strcmp(param, PARAM_SRPC_UDESC))
3202 if (strcmp(ptr, "yes") == 0) {
3203 set_bit(FSDB_UDESC, &fsdb->fsdb_flags);
3204 CWARN("Enable user descriptor shipping from client to MDT\n");
3205 } else if (strcmp(ptr, "no") == 0) {
3206 clear_bit(FSDB_UDESC, &fsdb->fsdb_flags);
3207 CWARN("Disable user descriptor shipping from client to MDT\n");
3215 CERROR("Invalid param: %s\n", param);
3219 static int mgs_srpc_set_param_mem(struct fs_db *fsdb,
3223 struct sptlrpc_rule rule;
3224 struct sptlrpc_rule_set *rset;
3228 if (strncmp(param, PARAM_SRPC, sizeof(PARAM_SRPC) - 1) != 0) {
3229 CERROR("Invalid sptlrpc parameter: %s\n", param);
3233 if (strncmp(param, PARAM_SRPC_UDESC,
3234 sizeof(PARAM_SRPC_UDESC) - 1) == 0) {
3235 RETURN(mgs_srpc_set_param_udesc_mem(fsdb, param));
3238 if (strncmp(param, PARAM_SRPC_FLVR, sizeof(PARAM_SRPC_FLVR) - 1) != 0) {
3239 CERROR("Invalid sptlrpc flavor parameter: %s\n", param);
3243 param += sizeof(PARAM_SRPC_FLVR) - 1;
3245 rc = sptlrpc_parse_rule(param, &rule);
3249 /* mgs rules implies must be mgc->mgs */
3250 if (test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
3251 if ((rule.sr_from != LUSTRE_SP_MGC &&
3252 rule.sr_from != LUSTRE_SP_ANY) ||
3253 (rule.sr_to != LUSTRE_SP_MGS &&
3254 rule.sr_to != LUSTRE_SP_ANY))
3258 /* preapre room for this coming rule. svcname format should be:
3259 * - fsname: general rule
3260 * - fsname-tgtname: target-specific rule
3262 if (strchr(svname, '-')) {
3263 struct mgs_tgt_srpc_conf *tgtconf;
3266 for (tgtconf = fsdb->fsdb_srpc_tgt; tgtconf != NULL;
3267 tgtconf = tgtconf->mtsc_next) {
3268 if (!strcmp(tgtconf->mtsc_tgt, svname)) {
3277 OBD_ALLOC_PTR(tgtconf);
3278 if (tgtconf == NULL)
3281 name_len = strlen(svname);
3283 OBD_ALLOC(tgtconf->mtsc_tgt, name_len + 1);
3284 if (tgtconf->mtsc_tgt == NULL) {
3285 OBD_FREE_PTR(tgtconf);
3288 memcpy(tgtconf->mtsc_tgt, svname, name_len);
3290 tgtconf->mtsc_next = fsdb->fsdb_srpc_tgt;
3291 fsdb->fsdb_srpc_tgt = tgtconf;
3294 rset = &tgtconf->mtsc_rset;
3295 } else if (strcmp(svname, MGSSELF_NAME) == 0) {
3296 /* put _mgs related srpc rule directly in mgs ruleset */
3297 rset = &fsdb->fsdb_mgs->mgs_lut.lut_sptlrpc_rset;
3299 rset = &fsdb->fsdb_srpc_gen;
3302 rc = sptlrpc_rule_set_merge(rset, &rule);
3307 static int mgs_srpc_set_param(const struct lu_env *env,
3308 struct mgs_device *mgs,
3310 struct mgs_target_info *mti,
3320 /* keep a copy of original param, which could be destroied
3322 copy_size = strlen(param) + 1;
3323 OBD_ALLOC(copy, copy_size);
3326 memcpy(copy, param, copy_size);
3328 rc = mgs_srpc_set_param_mem(fsdb, mti->mti_svname, param);
3332 /* previous steps guaranteed the syntax is correct */
3333 rc = mgs_srpc_set_param_disk(env, mgs, fsdb, mti, copy);
3337 if (test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
3339 * for mgs rules, make them effective immediately.
3341 LASSERT(fsdb->fsdb_srpc_tgt == NULL);
3342 sptlrpc_target_update_exp_flavor(mgs->mgs_obd,
3343 &fsdb->fsdb_srpc_gen);
3347 OBD_FREE(copy, copy_size);
3351 struct mgs_srpc_read_data {
3352 struct fs_db *msrd_fsdb;
3356 static int mgs_srpc_read_handler(const struct lu_env *env,
3357 struct llog_handle *llh,
3358 struct llog_rec_hdr *rec, void *data)
3360 struct mgs_srpc_read_data *msrd = data;
3361 struct cfg_marker *marker;
3362 struct lustre_cfg *lcfg = REC_DATA(rec);
3363 char *svname, *param;
3367 if (rec->lrh_type != OBD_CFG_REC) {
3368 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
3372 cfg_len = REC_DATA_LEN(rec);
3374 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
3376 CERROR("Insane cfg\n");
3380 if (lcfg->lcfg_command == LCFG_MARKER) {
3381 marker = lustre_cfg_buf(lcfg, 1);
3383 if (marker->cm_flags & CM_START &&
3384 marker->cm_flags & CM_SKIP)
3385 msrd->msrd_skip = 1;
3386 if (marker->cm_flags & CM_END)
3387 msrd->msrd_skip = 0;
3392 if (msrd->msrd_skip)
3395 if (lcfg->lcfg_command != LCFG_SPTLRPC_CONF) {
3396 CERROR("invalid command (%x)\n", lcfg->lcfg_command);
3400 svname = lustre_cfg_string(lcfg, 0);
3401 if (svname == NULL) {
3402 CERROR("svname is empty\n");
3406 param = lustre_cfg_string(lcfg, 1);
3407 if (param == NULL) {
3408 CERROR("param is empty\n");
3412 rc = mgs_srpc_set_param_mem(msrd->msrd_fsdb, svname, param);
3414 CERROR("read sptlrpc record error (%d): %s\n", rc, param);
3419 int mgs_get_fsdb_srpc_from_llog(const struct lu_env *env,
3420 struct mgs_device *mgs,
3423 struct llog_handle *llh = NULL;
3424 struct llog_ctxt *ctxt;
3426 struct mgs_srpc_read_data msrd;
3430 /* construct log name */
3431 rc = name_create(&logname, fsdb->fsdb_name, "-sptlrpc");
3435 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
3436 LASSERT(ctxt != NULL);
3438 if (mgs_log_is_empty(env, mgs, logname))
3441 rc = llog_open(env, ctxt, &llh, NULL, logname,
3449 rc = llog_init_handle(env, llh, LLOG_F_IS_PLAIN, NULL);
3451 GOTO(out_close, rc);
3453 if (llog_get_size(llh) <= 1)
3454 GOTO(out_close, rc = 0);
3456 msrd.msrd_fsdb = fsdb;
3459 rc = llog_process(env, llh, mgs_srpc_read_handler, (void *)&msrd,
3463 llog_close(env, llh);
3465 llog_ctxt_put(ctxt);
3466 name_destroy(&logname);
3469 CERROR("failed to read sptlrpc config database: %d\n", rc);
3473 /* Permanent settings of all parameters by writing into the appropriate
3474 * configuration logs.
3475 * A parameter with null value ("<param>='\0'") means to erase it out of
3478 static int mgs_write_log_param(const struct lu_env *env,
3479 struct mgs_device *mgs, struct fs_db *fsdb,
3480 struct mgs_target_info *mti, char *ptr)
3482 struct mgs_thread_info *mgi = mgs_env_info(env);
3488 /* For various parameter settings, we have to figure out which logs
3489 care about them (e.g. both mdt and client for lov settings) */
3490 CDEBUG(D_MGS, "next param '%s'\n", ptr);
3492 /* The params are stored in MOUNT_DATA_FILE and modified via
3493 tunefs.lustre, or set using lctl conf_param */
3495 /* Processed in lustre_start_mgc */
3496 if (class_match_param(ptr, PARAM_MGSNODE, NULL) == 0)
3499 /* Processed in ost/mdt */
3500 if (class_match_param(ptr, PARAM_NETWORK, NULL) == 0)
3503 /* Processed in mgs_write_log_ost */
3504 if (class_match_param(ptr, PARAM_FAILMODE, NULL) == 0) {
3505 if (mti->mti_flags & LDD_F_PARAM) {
3506 LCONSOLE_ERROR_MSG(0x169, "%s can only be "
3507 "changed with tunefs.lustre"
3508 "and --writeconf\n", ptr);
3514 if (class_match_param(ptr, PARAM_SRPC, NULL) == 0) {
3515 rc = mgs_srpc_set_param(env, mgs, fsdb, mti, ptr);
3519 if (class_match_param(ptr, PARAM_FAILNODE, NULL) == 0) {
3520 /* Add a failover nidlist */
3522 /* We already processed failovers params for new
3523 targets in mgs_write_log_target */
3524 if (mti->mti_flags & LDD_F_PARAM) {
3525 CDEBUG(D_MGS, "Adding failnode\n");
3526 rc = mgs_write_log_add_failnid(env, mgs, fsdb, mti);
3531 if (class_match_param(ptr, PARAM_SYS, &tmp) == 0) {
3532 rc = mgs_write_log_sys(env, mgs, fsdb, mti, ptr, tmp);
3536 if (class_match_param(ptr, PARAM_QUOTA, &tmp) == 0) {
3537 rc = mgs_write_log_quota(env, mgs, fsdb, mti, ptr, tmp);
3541 if (class_match_param(ptr, PARAM_OSC PARAM_ACTIVE, &tmp) == 0 ||
3542 class_match_param(ptr, PARAM_MDC PARAM_ACTIVE, &tmp) == 0) {
3543 /* active=0 means off, anything else means on */
3544 int flag = (*tmp == '0') ? CM_EXCLUDE : 0;
3545 bool deactive_osc = memcmp(ptr, PARAM_OSC PARAM_ACTIVE,
3546 strlen(PARAM_OSC PARAM_ACTIVE)) == 0;
3549 if (!deactive_osc) {
3552 rc = server_name2index(mti->mti_svname, &index, NULL);
3557 LCONSOLE_ERROR_MSG(0x144, "%s: MDC0 can not be"
3558 " (de)activated.\n",
3560 GOTO(end, rc = -EINVAL);
3564 LCONSOLE_WARN("Permanently %sactivating %s\n",
3565 flag ? "de" : "re", mti->mti_svname);
3567 rc = name_create(&logname, mti->mti_fsname, "-client");
3570 rc = mgs_modify(env, mgs, fsdb, mti, logname,
3572 deactive_osc ? "add osc" : "add mdc", flag);
3573 name_destroy(&logname);
3578 /* Add to all MDT logs for DNE */
3579 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3580 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
3582 rc = name_create_mdt(&logname, mti->mti_fsname, i);
3585 rc = mgs_modify(env, mgs, fsdb, mti, logname,
3587 deactive_osc ? "add osc" : "add osp",
3589 name_destroy(&logname);
3595 LCONSOLE_ERROR_MSG(0x145, "Couldn't find %s in"
3596 "log (%d). No permanent "
3597 "changes were made to the "
3599 mti->mti_svname, rc);
3600 if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags))
3601 LCONSOLE_ERROR_MSG(0x146, "This may be"
3606 "update the logs.\n");
3609 /* Fall through to osc/mdc proc for deactivating live
3610 OSC/OSP on running MDT / clients. */
3612 /* Below here, let obd's XXX_process_config methods handle it */
3614 /* All lov. in proc */
3615 if (class_match_param(ptr, PARAM_LOV, NULL) == 0) {
3618 CDEBUG(D_MGS, "lov param %s\n", ptr);
3619 if (!(mti->mti_flags & LDD_F_SV_TYPE_MDT)) {
3620 LCONSOLE_ERROR_MSG(0x147, "LOV params must be "
3621 "set on the MDT, not %s. "
3628 if (mgs_log_is_empty(env, mgs, mti->mti_svname))
3629 GOTO(end, rc = -ENODEV);
3631 rc = name_create_mdt_and_lov(&logname, &mdtlovname, fsdb,
3632 mti->mti_stripe_index);
3635 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, mti->mti_svname,
3636 &mgi->mgi_bufs, mdtlovname, ptr);
3637 name_destroy(&logname);
3638 name_destroy(&mdtlovname);
3643 rc = name_create(&logname, mti->mti_fsname, "-client");
3646 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname, &mgi->mgi_bufs,
3647 fsdb->fsdb_clilov, ptr);
3648 name_destroy(&logname);
3652 /* All osc., mdc., llite. params in proc */
3653 if ((class_match_param(ptr, PARAM_OSC, NULL) == 0) ||
3654 (class_match_param(ptr, PARAM_MDC, NULL) == 0) ||
3655 (class_match_param(ptr, PARAM_LLITE, NULL) == 0)) {
3658 if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags)) {
3659 LCONSOLE_ERROR_MSG(0x148, "Upgraded client logs for %s"
3660 " cannot be modified. Consider"
3661 " updating the configuration with"
3664 GOTO(end, rc = -EINVAL);
3666 if (memcmp(ptr, PARAM_LLITE, strlen(PARAM_LLITE)) == 0) {
3667 rc = name_create(&cname, mti->mti_fsname, "-client");
3668 /* Add the client type to match the obdname in
3669 class_config_llog_handler */
3670 } else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
3671 rc = name_create(&cname, mti->mti_svname, "-mdc");
3672 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
3673 rc = name_create(&cname, mti->mti_svname, "-osc");
3675 GOTO(end, rc = -EINVAL);
3680 /* Forbid direct update of llite root squash parameters.
3681 * These parameters are indirectly set via the MDT settings.
3683 if ((class_match_param(ptr, PARAM_LLITE, &tmp) == 0) &&
3684 ((memcmp(tmp, "root_squash=", 12) == 0) ||
3685 (memcmp(tmp, "nosquash_nids=", 14) == 0))) {
3686 LCONSOLE_ERROR("%s: root squash parameters can only "
3687 "be updated through MDT component\n",
3689 name_destroy(&cname);
3690 GOTO(end, rc = -EINVAL);
3693 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
3696 rc = name_create(&logname, mti->mti_fsname, "-client");
3698 name_destroy(&cname);
3701 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname, &mgi->mgi_bufs,
3704 /* osc params affect the MDT as well */
3705 if (!rc && (mti->mti_flags & LDD_F_SV_TYPE_OST)) {
3708 for (i = 0; i < INDEX_MAP_SIZE * 8; i++){
3709 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
3711 name_destroy(&cname);
3712 rc = name_create_mdt_osc(&cname, mti->mti_svname,
3714 name_destroy(&logname);
3717 rc = name_create_mdt(&logname,
3718 mti->mti_fsname, i);
3721 if (!mgs_log_is_empty(env, mgs, logname)) {
3722 rc = mgs_wlp_lcfg(env, mgs, fsdb,
3732 /* For mdc activate/deactivate, it affects OSP on MDT as well */
3733 if (class_match_param(ptr, PARAM_MDC PARAM_ACTIVE, &tmp) == 0 &&
3736 char *lodname = NULL;
3737 char *param_str = NULL;
3741 /* replace mdc with osp */
3742 memcpy(ptr, PARAM_OSP, strlen(PARAM_OSP));
3743 rc = server_name2index(mti->mti_svname, &index, NULL);
3745 memcpy(ptr, PARAM_MDC, strlen(PARAM_MDC));
3749 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3750 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
3756 name_destroy(&logname);
3757 rc = name_create_mdt(&logname, mti->mti_fsname,
3762 if (mgs_log_is_empty(env, mgs, logname))
3765 snprintf(suffix, sizeof(suffix), "-osp-MDT%04x",
3767 name_destroy(&cname);
3768 rc = name_create(&cname, mti->mti_svname,
3773 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname,
3774 &mgi->mgi_bufs, cname, ptr);
3778 /* Add configuration log for noitfying LOD
3779 * to active/deactive the OSP. */
3780 name_destroy(¶m_str);
3781 rc = name_create(¶m_str, cname,
3782 (*tmp == '0') ? ".active=0" :
3787 name_destroy(&lodname);
3788 rc = name_create(&lodname, logname, "-mdtlov");
3792 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname,
3793 &mgi->mgi_bufs, lodname,
3798 memcpy(ptr, PARAM_MDC, strlen(PARAM_MDC));
3799 name_destroy(&lodname);
3800 name_destroy(¶m_str);
3803 name_destroy(&logname);
3804 name_destroy(&cname);
3808 /* All mdt. params in proc */
3809 if (class_match_param(ptr, PARAM_MDT, &tmp) == 0) {
3813 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
3814 if (strncmp(mti->mti_svname, mti->mti_fsname,
3815 MTI_NAME_MAXLEN) == 0)
3816 /* device is unspecified completely? */
3817 rc = LDD_F_SV_TYPE_MDT | LDD_F_SV_ALL;
3819 rc = server_name2index(mti->mti_svname, &idx, NULL);
3822 if ((rc & LDD_F_SV_TYPE_MDT) == 0)
3824 if (rc & LDD_F_SV_ALL) {
3825 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3827 fsdb->fsdb_mdt_index_map))
3829 rc = name_create_mdt(&logname,
3830 mti->mti_fsname, i);
3833 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti,
3834 logname, &mgi->mgi_bufs,
3836 name_destroy(&logname);
3841 if ((memcmp(tmp, "root_squash=", 12) == 0) ||
3842 (memcmp(tmp, "nosquash_nids=", 14) == 0)) {
3843 LCONSOLE_ERROR("%s: root squash parameters "
3844 "cannot be applied to a single MDT\n",
3846 GOTO(end, rc = -EINVAL);
3848 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti,
3849 mti->mti_svname, &mgi->mgi_bufs,
3850 mti->mti_svname, ptr);
3855 /* root squash settings are also applied to llite
3856 * config log (see LU-1778) */
3858 ((memcmp(tmp, "root_squash=", 12) == 0) ||
3859 (memcmp(tmp, "nosquash_nids=", 14) == 0))) {
3863 rc = name_create(&cname, mti->mti_fsname, "-client");
3866 rc = name_create(&logname, mti->mti_fsname, "-client");
3868 name_destroy(&cname);
3871 rc = name_create(&ptr2, PARAM_LLITE, tmp);
3873 name_destroy(&cname);
3874 name_destroy(&logname);
3877 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname,
3878 &mgi->mgi_bufs, cname, ptr2);
3879 name_destroy(&ptr2);
3880 name_destroy(&logname);
3881 name_destroy(&cname);
3886 /* All mdd., ost. and osd. params in proc */
3887 if ((class_match_param(ptr, PARAM_MDD, NULL) == 0) ||
3888 (class_match_param(ptr, PARAM_OST, NULL) == 0) ||
3889 (class_match_param(ptr, PARAM_OSD, NULL) == 0)) {
3890 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
3891 if (mgs_log_is_empty(env, mgs, mti->mti_svname))
3892 GOTO(end, rc = -ENODEV);
3894 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, mti->mti_svname,
3895 &mgi->mgi_bufs, mti->mti_svname, ptr);
3899 LCONSOLE_WARN("Ignoring unrecognized param '%s'\n", ptr);
3903 CERROR("err %d on param '%s'\n", rc, ptr);
3908 int mgs_write_log_target(const struct lu_env *env, struct mgs_device *mgs,
3909 struct mgs_target_info *mti, struct fs_db *fsdb)
3916 /* set/check the new target index */
3917 rc = mgs_set_index(env, mgs, mti);
3921 if (rc == EALREADY) {
3922 LCONSOLE_WARN("Found index %d for %s, updating log\n",
3923 mti->mti_stripe_index, mti->mti_svname);
3924 /* We would like to mark old log sections as invalid
3925 and add new log sections in the client and mdt logs.
3926 But if we add new sections, then live clients will
3927 get repeat setup instructions for already running
3928 osc's. So don't update the client/mdt logs. */
3929 mti->mti_flags &= ~LDD_F_UPDATE;
3933 OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_WRITE_TARGET_DELAY, cfs_fail_val > 0 ?
3936 mutex_lock(&fsdb->fsdb_mutex);
3938 if (mti->mti_flags &
3939 (LDD_F_VIRGIN | LDD_F_UPGRADE14 | LDD_F_WRITECONF)) {
3940 /* Generate a log from scratch */
3941 if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
3942 rc = mgs_write_log_mdt(env, mgs, fsdb, mti);
3943 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
3944 rc = mgs_write_log_ost(env, mgs, fsdb, mti);
3946 CERROR("Unknown target type %#x, can't create log for "
3947 "%s\n", mti->mti_flags, mti->mti_svname);
3950 CERROR("Can't write logs for %s (%d)\n",
3951 mti->mti_svname, rc);
3955 /* Just update the params from tunefs in mgs_write_log_params */
3956 CDEBUG(D_MGS, "Update params for %s\n", mti->mti_svname);
3957 mti->mti_flags |= LDD_F_PARAM;
3960 /* allocate temporary buffer, where class_get_next_param will
3961 make copy of a current parameter */
3962 OBD_ALLOC(buf, strlen(mti->mti_params) + 1);
3964 GOTO(out_up, rc = -ENOMEM);
3965 params = mti->mti_params;
3966 while (params != NULL) {
3967 rc = class_get_next_param(¶ms, buf);
3970 /* there is no next parameter, that is
3975 CDEBUG(D_MGS, "remaining string: '%s', param: '%s'\n",
3977 rc = mgs_write_log_param(env, mgs, fsdb, mti, buf);
3982 OBD_FREE(buf, strlen(mti->mti_params) + 1);
3985 mutex_unlock(&fsdb->fsdb_mutex);
3989 int mgs_erase_log(const struct lu_env *env, struct mgs_device *mgs, char *name)
3991 struct llog_ctxt *ctxt;
3994 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
3996 CERROR("%s: MGS config context doesn't exist\n",
3997 mgs->mgs_obd->obd_name);
4000 rc = llog_erase(env, ctxt, NULL, name);
4001 /* llog may not exist */
4004 llog_ctxt_put(ctxt);
4008 CERROR("%s: failed to clear log %s: %d\n",
4009 mgs->mgs_obd->obd_name, name, rc);
4014 /* erase all logs for the given fs */
4015 int mgs_erase_logs(const struct lu_env *env, struct mgs_device *mgs,
4018 struct list_head log_list;
4019 struct mgs_direntry *dirent, *n;
4020 char barrier_name[20] = {};
4022 int rc, len = strlen(fsname);
4025 /* Find all the logs in the CONFIGS directory */
4026 rc = class_dentry_readdir(env, mgs, &log_list);
4030 mutex_lock(&mgs->mgs_mutex);
4031 snprintf(barrier_name, sizeof(barrier_name) - 1, "%s-%s",
4032 fsname, BARRIER_FILENAME);
4033 /* Delete the barrier fsdb */
4034 mgs_remove_fsdb_by_name(mgs, barrier_name);
4035 /* Delete the fs db */
4036 mgs_remove_fsdb_by_name(mgs, fsname);
4037 mutex_unlock(&mgs->mgs_mutex);
4039 list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
4040 list_del_init(&dirent->mde_list);
4041 suffix = strrchr(dirent->mde_name, '-');
4042 if (suffix != NULL) {
4043 if ((len == suffix - dirent->mde_name) &&
4044 (strncmp(fsname, dirent->mde_name, len) == 0)) {
4045 CDEBUG(D_MGS, "Removing log %s\n",
4047 mgs_erase_log(env, mgs, dirent->mde_name);
4050 mgs_direntry_free(dirent);
4056 /* list all logs for the given fs */
4057 int mgs_list_logs(const struct lu_env *env, struct mgs_device *mgs,
4058 struct obd_ioctl_data *data)
4060 struct list_head log_list;
4061 struct mgs_direntry *dirent, *n;
4067 /* Find all the logs in the CONFIGS directory */
4068 rc = class_dentry_readdir(env, mgs, &log_list);
4072 out = data->ioc_bulk;
4073 remains = data->ioc_inllen1;
4074 list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
4075 list_del_init(&dirent->mde_list);
4076 suffix = strrchr(dirent->mde_name, '-');
4077 if (suffix != NULL) {
4078 l = snprintf(out, remains, "config log: $%s\n",
4083 mgs_direntry_free(dirent);
4090 /* from llog_swab */
4091 static void print_lustre_cfg(struct lustre_cfg *lcfg)
4096 CDEBUG(D_MGS, "lustre_cfg: %p\n", lcfg);
4097 CDEBUG(D_MGS, "\tlcfg->lcfg_version: %#x\n", lcfg->lcfg_version);
4099 CDEBUG(D_MGS, "\tlcfg->lcfg_command: %#x\n", lcfg->lcfg_command);
4100 CDEBUG(D_MGS, "\tlcfg->lcfg_num: %#x\n", lcfg->lcfg_num);
4101 CDEBUG(D_MGS, "\tlcfg->lcfg_flags: %#x\n", lcfg->lcfg_flags);
4102 CDEBUG(D_MGS, "\tlcfg->lcfg_nid: %s\n", libcfs_nid2str(lcfg->lcfg_nid));
4104 CDEBUG(D_MGS, "\tlcfg->lcfg_bufcount: %d\n", lcfg->lcfg_bufcount);
4105 if (lcfg->lcfg_bufcount < LUSTRE_CFG_MAX_BUFCOUNT)
4106 for (i = 0; i < lcfg->lcfg_bufcount; i++) {
4107 CDEBUG(D_MGS, "\tlcfg->lcfg_buflens[%d]: %d %s\n",
4108 i, lcfg->lcfg_buflens[i],
4109 lustre_cfg_string(lcfg, i));
4114 /* Setup _mgs fsdb and log
4116 int mgs__mgs_fsdb_setup(const struct lu_env *env, struct mgs_device *mgs)
4118 struct fs_db *fsdb = NULL;
4122 rc = mgs_find_or_make_fsdb(env, mgs, MGSSELF_NAME, &fsdb);
4124 mgs_put_fsdb(mgs, fsdb);
4129 /* Setup params fsdb and log
4131 int mgs_params_fsdb_setup(const struct lu_env *env, struct mgs_device *mgs)
4133 struct fs_db *fsdb = NULL;
4134 struct llog_handle *params_llh = NULL;
4138 rc = mgs_find_or_make_fsdb(env, mgs, PARAMS_FILENAME, &fsdb);
4140 mutex_lock(&fsdb->fsdb_mutex);
4141 rc = record_start_log(env, mgs, ¶ms_llh, PARAMS_FILENAME);
4143 rc = record_end_log(env, ¶ms_llh);
4144 mutex_unlock(&fsdb->fsdb_mutex);
4145 mgs_put_fsdb(mgs, fsdb);
4151 /* Cleanup params fsdb and log
4153 int mgs_params_fsdb_cleanup(const struct lu_env *env, struct mgs_device *mgs)
4155 return mgs_erase_logs(env, mgs, PARAMS_FILENAME);
4158 /* Set a permanent (config log) param for a target or fs
4159 * \param lcfg buf0 may contain the device (testfs-MDT0000) name
4160 * buf1 contains the single parameter
4162 int mgs_setparam(const struct lu_env *env, struct mgs_device *mgs,
4163 struct lustre_cfg *lcfg, char *fsname)
4165 struct fs_db *fsdb = NULL;
4166 struct mgs_target_info *mti = NULL;
4167 char *devname, *param;
4175 print_lustre_cfg(lcfg);
4177 /* lustre, lustre-mdtlov, lustre-client, lustre-MDT0000 */
4178 devname = lustre_cfg_string(lcfg, 0);
4179 param = lustre_cfg_string(lcfg, 1);
4181 /* Assume device name embedded in param:
4182 lustre-OST0000.osc.max_dirty_mb=32 */
4183 ptr = strchr(param, '.');
4191 LCONSOLE_ERROR_MSG(0x14d, "No target specified: %s\n", param);
4195 rc = mgs_parse_devname(devname, fsname, NULL);
4196 if (rc == 0 && !mgs_parse_devname(devname, NULL, &index)) {
4197 /* param related to llite isn't allowed to set by OST or MDT */
4198 if (rc == 0 && strncmp(param, PARAM_LLITE,
4199 sizeof(PARAM_LLITE) - 1) == 0)
4202 /* assume devname is the fsname */
4203 strlcpy(fsname, devname, MTI_NAME_MAXLEN);
4205 CDEBUG(D_MGS, "setparam fs='%s' device='%s'\n", fsname, devname);
4207 rc = mgs_find_or_make_fsdb(env, mgs,
4208 lcfg->lcfg_command == LCFG_SET_PARAM ?
4209 PARAMS_FILENAME : fsname, &fsdb);
4213 if (lcfg->lcfg_command != LCFG_SET_PARAM &&
4214 !test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags) &&
4215 test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) {
4216 CERROR("No filesystem targets for %s. cfg_device from lctl "
4217 "is '%s'\n", fsname, devname);
4219 GOTO(out, rc = -EINVAL);
4222 /* Create a fake mti to hold everything */
4225 GOTO(out, rc = -ENOMEM);
4226 if (strlcpy(mti->mti_fsname, fsname, sizeof(mti->mti_fsname))
4227 >= sizeof(mti->mti_fsname))
4228 GOTO(out, rc = -E2BIG);
4229 if (strlcpy(mti->mti_svname, devname, sizeof(mti->mti_svname))
4230 >= sizeof(mti->mti_svname))
4231 GOTO(out, rc = -E2BIG);
4232 if (strlcpy(mti->mti_params, param, sizeof(mti->mti_params))
4233 >= sizeof(mti->mti_params))
4234 GOTO(out, rc = -E2BIG);
4235 rc = server_name2index(mti->mti_svname, &mti->mti_stripe_index, &tmp);
4237 /* Not a valid server; may be only fsname */
4240 /* Strip -osc or -mdc suffix from svname */
4241 if (server_make_name(rc, mti->mti_stripe_index, mti->mti_fsname,
4243 GOTO(out, rc = -EINVAL);
4245 * Revoke lock so everyone updates. Should be alright if
4246 * someone was already reading while we were updating the logs,
4247 * so we don't really need to hold the lock while we're
4250 if (lcfg->lcfg_command == LCFG_SET_PARAM) {
4251 mti->mti_flags = rc | LDD_F_PARAM2;
4252 mutex_lock(&fsdb->fsdb_mutex);
4253 rc = mgs_write_log_param2(env, mgs, fsdb, mti, mti->mti_params);
4254 mutex_unlock(&fsdb->fsdb_mutex);
4255 mgs_revoke_lock(mgs, fsdb, CONFIG_T_PARAMS);
4257 mti->mti_flags = rc | LDD_F_PARAM;
4258 mutex_lock(&fsdb->fsdb_mutex);
4259 rc = mgs_write_log_param(env, mgs, fsdb, mti, mti->mti_params);
4260 mutex_unlock(&fsdb->fsdb_mutex);
4261 mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG);
4270 mgs_unlink_fsdb(mgs, fsdb);
4271 mgs_put_fsdb(mgs, fsdb);
4277 static int mgs_write_log_pool(const struct lu_env *env,
4278 struct mgs_device *mgs, char *logname,
4279 struct fs_db *fsdb, char *tgtname,
4280 enum lcfg_command_type cmd,
4281 char *fsname, char *poolname,
4282 char *ostname, char *comment)
4284 struct llog_handle *llh = NULL;
4287 rc = record_start_log(env, mgs, &llh, logname);
4290 rc = record_marker(env, llh, fsdb, CM_START, tgtname, comment);
4293 rc = record_base(env, llh, tgtname, 0, cmd,
4294 fsname, poolname, ostname, NULL);
4297 rc = record_marker(env, llh, fsdb, CM_END, tgtname, comment);
4299 record_end_log(env, &llh);
4303 int mgs_nodemap_cmd(const struct lu_env *env, struct mgs_device *mgs,
4304 enum lcfg_command_type cmd, const char *nodemap_name,
4315 case LCFG_NODEMAP_ADD:
4316 rc = nodemap_add(nodemap_name);
4318 case LCFG_NODEMAP_DEL:
4319 rc = nodemap_del(nodemap_name);
4321 case LCFG_NODEMAP_ADD_RANGE:
4322 rc = nodemap_parse_range(param, nid);
4325 rc = nodemap_add_range(nodemap_name, nid);
4327 case LCFG_NODEMAP_DEL_RANGE:
4328 rc = nodemap_parse_range(param, nid);
4331 rc = nodemap_del_range(nodemap_name, nid);
4333 case LCFG_NODEMAP_ADMIN:
4334 bool_switch = simple_strtoul(param, NULL, 10);
4335 rc = nodemap_set_allow_root(nodemap_name, bool_switch);
4337 case LCFG_NODEMAP_DENY_UNKNOWN:
4338 bool_switch = simple_strtoul(param, NULL, 10);
4339 rc = nodemap_set_deny_unknown(nodemap_name, bool_switch);
4341 case LCFG_NODEMAP_TRUSTED:
4342 bool_switch = simple_strtoul(param, NULL, 10);
4343 rc = nodemap_set_trust_client_ids(nodemap_name, bool_switch);
4345 case LCFG_NODEMAP_SQUASH_UID:
4346 int_id = simple_strtoul(param, NULL, 10);
4347 rc = nodemap_set_squash_uid(nodemap_name, int_id);
4349 case LCFG_NODEMAP_SQUASH_GID:
4350 int_id = simple_strtoul(param, NULL, 10);
4351 rc = nodemap_set_squash_gid(nodemap_name, int_id);
4353 case LCFG_NODEMAP_ADD_UIDMAP:
4354 case LCFG_NODEMAP_ADD_GIDMAP:
4355 rc = nodemap_parse_idmap(param, idmap);
4358 if (cmd == LCFG_NODEMAP_ADD_UIDMAP)
4359 rc = nodemap_add_idmap(nodemap_name, NODEMAP_UID,
4362 rc = nodemap_add_idmap(nodemap_name, NODEMAP_GID,
4365 case LCFG_NODEMAP_DEL_UIDMAP:
4366 case LCFG_NODEMAP_DEL_GIDMAP:
4367 rc = nodemap_parse_idmap(param, idmap);
4370 if (cmd == LCFG_NODEMAP_DEL_UIDMAP)
4371 rc = nodemap_del_idmap(nodemap_name, NODEMAP_UID,
4374 rc = nodemap_del_idmap(nodemap_name, NODEMAP_GID,
4377 case LCFG_NODEMAP_SET_FILESET:
4378 rc = nodemap_set_fileset(nodemap_name, param);
4387 int mgs_pool_cmd(const struct lu_env *env, struct mgs_device *mgs,
4388 enum lcfg_command_type cmd, char *fsname,
4389 char *poolname, char *ostname)
4394 char *label = NULL, *canceled_label = NULL;
4396 struct mgs_target_info *mti = NULL;
4397 bool checked = false;
4398 bool locked = false;
4403 rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
4405 CERROR("Can't get db for %s\n", fsname);
4408 if (test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) {
4409 CERROR("%s is not defined\n", fsname);
4411 GOTO(out_fsdb, rc = -EINVAL);
4414 label_sz = 10 + strlen(fsname) + strlen(poolname);
4416 /* check if ostname match fsname */
4417 if (ostname != NULL) {
4420 ptr = strrchr(ostname, '-');
4421 if ((ptr == NULL) ||
4422 (strncmp(fsname, ostname, ptr-ostname) != 0))
4424 label_sz += strlen(ostname);
4427 OBD_ALLOC(label, label_sz);
4429 GOTO(out_fsdb, rc = -ENOMEM);
4434 "new %s.%s", fsname, poolname);
4438 "add %s.%s.%s", fsname, poolname, ostname);
4441 OBD_ALLOC(canceled_label, label_sz);
4442 if (canceled_label == NULL)
4443 GOTO(out_label, rc = -ENOMEM);
4445 "rem %s.%s.%s", fsname, poolname, ostname);
4446 sprintf(canceled_label,
4447 "add %s.%s.%s", fsname, poolname, ostname);
4450 OBD_ALLOC(canceled_label, label_sz);
4451 if (canceled_label == NULL)
4452 GOTO(out_label, rc = -ENOMEM);
4454 "del %s.%s", fsname, poolname);
4455 sprintf(canceled_label,
4456 "new %s.%s", fsname, poolname);
4464 GOTO(out_cancel, rc = -ENOMEM);
4465 strncpy(mti->mti_svname, "lov pool", sizeof(mti->mti_svname));
4467 mutex_lock(&fsdb->fsdb_mutex);
4469 /* write pool def to all MDT logs */
4470 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
4471 if (test_bit(i, fsdb->fsdb_mdt_index_map)) {
4472 rc = name_create_mdt_and_lov(&logname, &lovname,
4477 if (!checked && (canceled_label == NULL)) {
4478 rc = mgs_check_marker(env, mgs, fsdb, mti,
4479 logname, lovname, label);
4481 name_destroy(&logname);
4482 name_destroy(&lovname);
4484 rc = (rc == LLOG_PROC_BREAK ?
4489 if (canceled_label != NULL)
4490 rc = mgs_modify(env, mgs, fsdb, mti, logname,
4491 lovname, canceled_label,
4495 rc = mgs_write_log_pool(env, mgs, logname,
4499 name_destroy(&logname);
4500 name_destroy(&lovname);
4506 rc = name_create(&logname, fsname, "-client");
4510 if (!checked && (canceled_label == NULL)) {
4511 rc = mgs_check_marker(env, mgs, fsdb, mti, logname,
4512 fsdb->fsdb_clilov, label);
4514 name_destroy(&logname);
4515 GOTO(out_mti, rc = (rc == LLOG_PROC_BREAK ?
4519 if (canceled_label != NULL) {
4520 rc = mgs_modify(env, mgs, fsdb, mti, logname,
4521 fsdb->fsdb_clilov, canceled_label, CM_SKIP);
4523 name_destroy(&logname);
4528 rc = mgs_write_log_pool(env, mgs, logname, fsdb, fsdb->fsdb_clilov,
4529 cmd, fsname, poolname, ostname, label);
4530 mutex_unlock(&fsdb->fsdb_mutex);
4532 name_destroy(&logname);
4533 /* request for update */
4534 mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG);
4540 mutex_unlock(&fsdb->fsdb_mutex);
4544 if (canceled_label != NULL)
4545 OBD_FREE(canceled_label, label_sz);
4547 OBD_FREE(label, label_sz);
4550 mgs_unlink_fsdb(mgs, fsdb);
4551 mgs_put_fsdb(mgs, fsdb);