4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2016, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 * lustre/mgs/mgs_llog.c
34 * Lustre Management Server (mgs) config llog creation
36 * Author: Nathan Rutman <nathan@clusterfs.com>
37 * Author: Alex Zhuravlev <bzzz@whamcloud.com>
38 * Author: Mikhail Pershin <tappro@whamcloud.com>
41 #define DEBUG_SUBSYSTEM S_MGS
42 #define D_MGS D_CONFIG
45 #include <lustre_ioctl.h>
46 #include <lustre_param.h>
47 #include <lustre_sec.h>
48 #include <lustre_quota.h>
50 #include "mgs_internal.h"
52 /********************** Class functions ********************/
54 /* Find all logs in CONFIG directory and link then into list */
55 int class_dentry_readdir(const struct lu_env *env,
56 struct mgs_device *mgs, struct list_head *log_list)
58 struct dt_object *dir = mgs->mgs_configs_dir;
59 const struct dt_it_ops *iops;
61 struct mgs_direntry *de;
65 INIT_LIST_HEAD(log_list);
68 LASSERT(dir->do_index_ops);
70 iops = &dir->do_index_ops->dio_it;
71 it = iops->init(env, dir, LUDA_64BITHASH);
75 rc = iops->load(env, it, 0);
81 key = (void *)iops->key(env, it);
83 CERROR("%s: key failed when listing %s: rc = %d\n",
84 mgs->mgs_obd->obd_name, MOUNT_CONFIGS_DIR,
88 key_sz = iops->key_size(env, it);
91 /* filter out "." and ".." entries */
95 if (key_sz == 2 && key[1] == '.')
99 /* filter out ".bak" files */
100 /* sizeof(".bak") - 1 == 3 */
102 !memcmp(".bak", key + key_sz - 3, 3)) {
103 CDEBUG(D_MGS, "Skipping backup file %.*s\n",
108 de = mgs_direntry_alloc(key_sz + 1);
114 memcpy(de->mde_name, key, key_sz);
115 de->mde_name[key_sz] = 0;
117 list_add(&de->mde_list, log_list);
120 rc = iops->next(env, it);
130 CERROR("%s: key failed when listing %s: rc = %d\n",
131 mgs->mgs_obd->obd_name, MOUNT_CONFIGS_DIR, rc);
135 /******************** DB functions *********************/
137 static inline int name_create(char **newname, char *prefix, char *suffix)
140 OBD_ALLOC(*newname, strlen(prefix) + strlen(suffix) + 1);
143 sprintf(*newname, "%s%s", prefix, suffix);
147 static inline void name_destroy(char **name)
150 OBD_FREE(*name, strlen(*name) + 1);
154 struct mgs_fsdb_handler_data
160 /* from the (client) config log, figure out:
161 1. which ost's/mdt's are configured (by index)
162 2. what the last config step is
163 3. COMPAT_18 osc name
165 /* It might be better to have a separate db file, instead of parsing the info
166 out of the client log. This is slow and potentially error-prone. */
167 static int mgs_fsdb_handler(const struct lu_env *env, struct llog_handle *llh,
168 struct llog_rec_hdr *rec, void *data)
170 struct mgs_fsdb_handler_data *d = data;
171 struct fs_db *fsdb = d->fsdb;
172 int cfg_len = rec->lrh_len;
173 char *cfg_buf = (char*) (rec + 1);
174 struct lustre_cfg *lcfg;
179 if (rec->lrh_type != OBD_CFG_REC) {
180 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
184 rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
186 CERROR("Insane cfg\n");
190 lcfg = (struct lustre_cfg *)cfg_buf;
192 CDEBUG(D_INFO, "cmd %x %s %s\n", lcfg->lcfg_command,
193 lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
195 /* Figure out ost indicies */
196 /* lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1 */
197 if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD ||
198 lcfg->lcfg_command == LCFG_LOV_DEL_OBD) {
199 index = simple_strtoul(lustre_cfg_string(lcfg, 2),
201 CDEBUG(D_MGS, "OST index for %s is %u (%s)\n",
202 lustre_cfg_string(lcfg, 1), index,
203 lustre_cfg_string(lcfg, 2));
204 set_bit(index, fsdb->fsdb_ost_index_map);
207 /* Figure out mdt indicies */
208 /* attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:1d834_MNT_client_03f */
209 if ((lcfg->lcfg_command == LCFG_ATTACH) &&
210 (strcmp(lustre_cfg_string(lcfg, 1), LUSTRE_MDC_NAME) == 0)) {
211 rc = server_name2index(lustre_cfg_string(lcfg, 0),
213 if (rc != LDD_F_SV_TYPE_MDT) {
214 CWARN("Unparsable MDC name %s, assuming index 0\n",
215 lustre_cfg_string(lcfg, 0));
219 CDEBUG(D_MGS, "MDT index is %u\n", index);
220 set_bit(index, fsdb->fsdb_mdt_index_map);
221 fsdb->fsdb_mdt_count ++;
225 * figure out the old config. fsdb_gen = 0 means old log
226 * It is obsoleted and not supported anymore
228 if (fsdb->fsdb_gen == 0) {
229 CERROR("Old config format is not supported\n");
234 * compat to 1.8, check osc name used by MDT0 to OSTs, bz18548.
236 if (!test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags) &&
237 lcfg->lcfg_command == LCFG_ATTACH &&
238 strcmp(lustre_cfg_string(lcfg, 1), LUSTRE_OSC_NAME) == 0) {
239 if (OBD_OCD_VERSION_MAJOR(d->ver) == 1 &&
240 OBD_OCD_VERSION_MINOR(d->ver) <= 8) {
241 CWARN("MDT using 1.8 OSC name scheme\n");
242 set_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags);
246 if (lcfg->lcfg_command == LCFG_MARKER) {
247 struct cfg_marker *marker;
248 marker = lustre_cfg_buf(lcfg, 1);
250 d->ver = marker->cm_vers;
252 /* Keep track of the latest marker step */
253 fsdb->fsdb_gen = max(fsdb->fsdb_gen, marker->cm_step);
259 /* fsdb->fsdb_mutex is already held in mgs_find_or_make_fsdb*/
260 static int mgs_get_fsdb_from_llog(const struct lu_env *env,
261 struct mgs_device *mgs,
265 struct llog_handle *loghandle;
266 struct llog_ctxt *ctxt;
267 struct mgs_fsdb_handler_data d = {
274 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
275 LASSERT(ctxt != NULL);
276 rc = name_create(&logname, fsdb->fsdb_name, "-client");
279 rc = llog_open_create(env, ctxt, &loghandle, NULL, logname);
283 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
287 if (llog_get_size(loghandle) <= 1)
288 set_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags);
290 rc = llog_process(env, loghandle, mgs_fsdb_handler, (void *)&d, NULL);
291 CDEBUG(D_INFO, "get_db = %d\n", rc);
293 llog_close(env, loghandle);
295 name_destroy(&logname);
302 static void mgs_free_fsdb_srpc(struct fs_db *fsdb)
304 struct mgs_tgt_srpc_conf *tgtconf;
306 /* free target-specific rules */
307 while (fsdb->fsdb_srpc_tgt) {
308 tgtconf = fsdb->fsdb_srpc_tgt;
309 fsdb->fsdb_srpc_tgt = tgtconf->mtsc_next;
311 LASSERT(tgtconf->mtsc_tgt);
313 sptlrpc_rule_set_free(&tgtconf->mtsc_rset);
314 OBD_FREE(tgtconf->mtsc_tgt, strlen(tgtconf->mtsc_tgt) + 1);
315 OBD_FREE_PTR(tgtconf);
318 /* free general rules */
319 sptlrpc_rule_set_free(&fsdb->fsdb_srpc_gen);
322 static void mgs_unlink_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
324 mutex_lock(&mgs->mgs_mutex);
325 if (likely(!list_empty(&fsdb->fsdb_list))) {
326 LASSERTF(atomic_read(&fsdb->fsdb_ref) >= 2,
327 "Invalid ref %d on %s\n",
328 atomic_read(&fsdb->fsdb_ref),
331 list_del_init(&fsdb->fsdb_list);
332 /* Drop the reference on the list.*/
333 mgs_put_fsdb(mgs, fsdb);
335 mutex_unlock(&mgs->mgs_mutex);
338 /* The caller must hold mgs->mgs_mutex. */
339 static inline struct fs_db *
340 mgs_find_fsdb_noref(struct mgs_device *mgs, const char *fsname)
343 struct list_head *tmp;
345 list_for_each(tmp, &mgs->mgs_fs_db_list) {
346 fsdb = list_entry(tmp, struct fs_db, fsdb_list);
347 if (strcmp(fsdb->fsdb_name, fsname) == 0)
354 /* The caller must hold mgs->mgs_mutex. */
355 static void mgs_remove_fsdb_by_name(struct mgs_device *mgs, const char *name)
359 fsdb = mgs_find_fsdb_noref(mgs, name);
361 list_del_init(&fsdb->fsdb_list);
362 /* Drop the reference on the list.*/
363 mgs_put_fsdb(mgs, fsdb);
367 /* The caller must hold mgs->mgs_mutex. */
368 struct fs_db *mgs_find_fsdb(struct mgs_device *mgs, const char *fsname)
372 fsdb = mgs_find_fsdb_noref(mgs, fsname);
374 atomic_inc(&fsdb->fsdb_ref);
379 /* The caller must hold mgs->mgs_mutex. */
380 static struct fs_db *mgs_new_fsdb(const struct lu_env *env,
381 struct mgs_device *mgs, char *fsname)
387 if (strlen(fsname) >= sizeof(fsdb->fsdb_name)) {
388 CERROR("fsname %s is too long\n", fsname);
390 RETURN(ERR_PTR(-EINVAL));
395 RETURN(ERR_PTR(-ENOMEM));
397 strncpy(fsdb->fsdb_name, fsname, sizeof(fsdb->fsdb_name));
398 mutex_init(&fsdb->fsdb_mutex);
399 INIT_LIST_HEAD(&fsdb->fsdb_list);
400 set_bit(FSDB_UDESC, &fsdb->fsdb_flags);
402 INIT_LIST_HEAD(&fsdb->fsdb_clients);
403 atomic_set(&fsdb->fsdb_notify_phase, 0);
404 init_waitqueue_head(&fsdb->fsdb_notify_waitq);
405 init_completion(&fsdb->fsdb_notify_comp);
407 if (strcmp(fsname, MGSSELF_NAME) == 0) {
408 set_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags);
409 fsdb->fsdb_mgs = mgs;
411 OBD_ALLOC(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
412 if (!fsdb->fsdb_mdt_index_map) {
413 CERROR("No memory for MDT index maps\n");
415 GOTO(err, rc = -ENOMEM);
418 OBD_ALLOC(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
419 if (!fsdb->fsdb_ost_index_map) {
420 CERROR("No memory for OST index maps\n");
422 GOTO(err, rc = -ENOMEM);
425 rc = name_create(&fsdb->fsdb_clilov, fsname, "-clilov");
429 rc = name_create(&fsdb->fsdb_clilmv, fsname, "-clilmv");
433 /* initialise data for NID table */
434 mgs_ir_init_fs(env, mgs, fsdb);
435 lproc_mgs_add_live(mgs, fsdb);
438 if (!test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
439 /* populate the db from the client llog */
440 rc = mgs_get_fsdb_from_llog(env, mgs, fsdb);
442 CERROR("Can't get db from client log %d\n", rc);
448 /* populate srpc rules from params llog */
449 rc = mgs_get_fsdb_srpc_from_llog(env, mgs, fsdb);
451 CERROR("Can't get db from params log %d\n", rc);
456 /* One ref is for the fsdb on the list.
457 * The other ref is for the caller. */
458 atomic_set(&fsdb->fsdb_ref, 2);
459 list_add(&fsdb->fsdb_list, &mgs->mgs_fs_db_list);
464 atomic_set(&fsdb->fsdb_ref, 1);
465 mgs_put_fsdb(mgs, fsdb);
470 static void mgs_free_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
472 LASSERT(list_empty(&fsdb->fsdb_list));
474 lproc_mgs_del_live(mgs, fsdb);
476 /* deinitialize fsr */
477 mgs_ir_fini_fs(mgs, fsdb);
479 if (fsdb->fsdb_ost_index_map)
480 OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
481 if (fsdb->fsdb_mdt_index_map)
482 OBD_FREE(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
483 name_destroy(&fsdb->fsdb_clilov);
484 name_destroy(&fsdb->fsdb_clilmv);
485 mgs_free_fsdb_srpc(fsdb);
489 void mgs_put_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
491 if (atomic_dec_and_test(&fsdb->fsdb_ref))
492 mgs_free_fsdb(mgs, fsdb);
495 int mgs_init_fsdb_list(struct mgs_device *mgs)
497 INIT_LIST_HEAD(&mgs->mgs_fs_db_list);
501 int mgs_cleanup_fsdb_list(struct mgs_device *mgs)
504 struct list_head *tmp, *tmp2;
506 mutex_lock(&mgs->mgs_mutex);
507 list_for_each_safe(tmp, tmp2, &mgs->mgs_fs_db_list) {
508 fsdb = list_entry(tmp, struct fs_db, fsdb_list);
509 list_del_init(&fsdb->fsdb_list);
510 mgs_put_fsdb(mgs, fsdb);
512 mutex_unlock(&mgs->mgs_mutex);
516 int mgs_find_or_make_fsdb(const struct lu_env *env, struct mgs_device *mgs,
517 char *name, struct fs_db **dbh)
523 mutex_lock(&mgs->mgs_mutex);
524 fsdb = mgs_find_fsdb(mgs, name);
526 fsdb = mgs_new_fsdb(env, mgs, name);
530 CDEBUG(D_MGS, "Created new db: rc = %d\n", rc);
532 mutex_unlock(&mgs->mgs_mutex);
542 -1= empty client log */
543 int mgs_check_index(const struct lu_env *env,
544 struct mgs_device *mgs,
545 struct mgs_target_info *mti)
552 LASSERT(!(mti->mti_flags & LDD_F_NEED_INDEX));
554 rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb);
556 CERROR("Can't get db for %s\n", mti->mti_fsname);
560 if (test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags))
563 if (mti->mti_flags & LDD_F_SV_TYPE_OST)
564 imap = fsdb->fsdb_ost_index_map;
565 else if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
566 imap = fsdb->fsdb_mdt_index_map;
568 GOTO(out, rc = -EINVAL);
570 if (test_bit(mti->mti_stripe_index, imap))
576 mgs_put_fsdb(mgs, fsdb);
580 static __inline__ int next_index(void *index_map, int map_len)
583 for (i = 0; i < map_len * 8; i++)
584 if (!test_bit(i, index_map)) {
587 CERROR("max index %d exceeded.\n", i);
592 0 newly marked as in use
594 +EALREADY for update of an old index */
595 static int mgs_set_index(const struct lu_env *env,
596 struct mgs_device *mgs,
597 struct mgs_target_info *mti)
604 rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb);
606 CERROR("Can't get db for %s\n", mti->mti_fsname);
610 mutex_lock(&fsdb->fsdb_mutex);
611 if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
612 imap = fsdb->fsdb_ost_index_map;
613 } else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
614 imap = fsdb->fsdb_mdt_index_map;
616 GOTO(out_up, rc = -EINVAL);
619 if (mti->mti_flags & LDD_F_NEED_INDEX) {
620 rc = next_index(imap, INDEX_MAP_SIZE);
622 GOTO(out_up, rc = -ERANGE);
623 mti->mti_stripe_index = rc;
624 if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
625 fsdb->fsdb_mdt_count ++;
628 /* the last index(0xffff) is reserved for default value. */
629 if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8 - 1) {
630 LCONSOLE_ERROR_MSG(0x13f, "Server %s requested index %u, "
631 "but index must be less than %u.\n",
632 mti->mti_svname, mti->mti_stripe_index,
633 INDEX_MAP_SIZE * 8 - 1);
634 GOTO(out_up, rc = -ERANGE);
637 if (test_bit(mti->mti_stripe_index, imap)) {
638 if ((mti->mti_flags & LDD_F_VIRGIN) &&
639 !(mti->mti_flags & LDD_F_WRITECONF)) {
640 LCONSOLE_ERROR_MSG(0x140, "Server %s requested index "
641 "%d, but that index is already in "
642 "use. Use --writeconf to force\n",
644 mti->mti_stripe_index);
645 GOTO(out_up, rc = -EADDRINUSE);
647 CDEBUG(D_MGS, "Server %s updating index %d\n",
648 mti->mti_svname, mti->mti_stripe_index);
649 GOTO(out_up, rc = EALREADY);
653 set_bit(mti->mti_stripe_index, imap);
654 clear_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags);
655 if (server_make_name(mti->mti_flags & ~(LDD_F_VIRGIN | LDD_F_WRITECONF),
656 mti->mti_stripe_index, mti->mti_fsname,
658 CERROR("unknown server type %#x\n", mti->mti_flags);
659 GOTO(out_up, rc = -EINVAL);
662 CDEBUG(D_MGS, "Set index for %s to %d\n", mti->mti_svname,
663 mti->mti_stripe_index);
665 GOTO(out_up, rc = 0);
668 mutex_unlock(&fsdb->fsdb_mutex);
669 mgs_put_fsdb(mgs, fsdb);
673 struct mgs_modify_lookup {
674 struct cfg_marker mml_marker;
678 static int mgs_check_record_match(const struct lu_env *env,
679 struct llog_handle *llh,
680 struct llog_rec_hdr *rec, void *data)
682 struct cfg_marker *mc_marker = data;
683 struct cfg_marker *marker;
684 struct lustre_cfg *lcfg = REC_DATA(rec);
685 int cfg_len = REC_DATA_LEN(rec);
690 if (rec->lrh_type != OBD_CFG_REC) {
691 CDEBUG(D_ERROR, "Unhandled lrh_type: %#x\n", rec->lrh_type);
695 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
697 CDEBUG(D_ERROR, "Insane cfg\n");
701 /* We only care about markers */
702 if (lcfg->lcfg_command != LCFG_MARKER)
705 marker = lustre_cfg_buf(lcfg, 1);
707 if (marker->cm_flags & CM_SKIP)
710 if ((strcmp(mc_marker->cm_comment, marker->cm_comment) == 0) &&
711 (strcmp(mc_marker->cm_tgtname, marker->cm_tgtname) == 0)) {
712 /* Found a non-skipped marker match */
713 CDEBUG(D_MGS, "Matched rec %u marker %d flag %x %s %s\n",
714 rec->lrh_index, marker->cm_step,
715 marker->cm_flags, marker->cm_tgtname,
717 rc = LLOG_PROC_BREAK;
724 * Check an existing config log record with matching comment and device
726 * 0 - checked successfully,
727 * LLOG_PROC_BREAK - record matches
730 static int mgs_check_marker(const struct lu_env *env, struct mgs_device *mgs,
731 struct fs_db *fsdb, struct mgs_target_info *mti,
732 char *logname, char *devname, char *comment)
734 struct llog_handle *loghandle;
735 struct llog_ctxt *ctxt;
736 struct cfg_marker *mc_marker;
741 LASSERT(mutex_is_locked(&fsdb->fsdb_mutex));
742 CDEBUG(D_MGS, "mgs check %s/%s/%s\n", logname, devname, comment);
744 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
745 LASSERT(ctxt != NULL);
746 rc = llog_open(env, ctxt, &loghandle, NULL, logname, LLOG_OPEN_EXISTS);
753 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
757 if (llog_get_size(loghandle) <= 1)
758 GOTO(out_close, rc = 0);
760 OBD_ALLOC_PTR(mc_marker);
762 GOTO(out_close, rc = -ENOMEM);
763 if (strlcpy(mc_marker->cm_comment, comment,
764 sizeof(mc_marker->cm_comment)) >=
765 sizeof(mc_marker->cm_comment))
766 GOTO(out_free, rc = -E2BIG);
767 if (strlcpy(mc_marker->cm_tgtname, devname,
768 sizeof(mc_marker->cm_tgtname)) >=
769 sizeof(mc_marker->cm_tgtname))
770 GOTO(out_free, rc = -E2BIG);
772 rc = llog_process(env, loghandle, mgs_check_record_match,
773 (void *)mc_marker, NULL);
776 OBD_FREE_PTR(mc_marker);
779 llog_close(env, loghandle);
781 if (rc && rc != LLOG_PROC_BREAK)
782 CDEBUG(D_ERROR, "%s: mgs check %s/%s failed: rc = %d\n",
783 mgs->mgs_obd->obd_name, mti->mti_svname, comment, rc);
788 static int mgs_modify_handler(const struct lu_env *env,
789 struct llog_handle *llh,
790 struct llog_rec_hdr *rec, void *data)
792 struct mgs_modify_lookup *mml = data;
793 struct cfg_marker *marker;
794 struct lustre_cfg *lcfg = REC_DATA(rec);
795 int cfg_len = REC_DATA_LEN(rec);
799 if (rec->lrh_type != OBD_CFG_REC) {
800 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
804 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
806 CERROR("Insane cfg\n");
810 /* We only care about markers */
811 if (lcfg->lcfg_command != LCFG_MARKER)
814 marker = lustre_cfg_buf(lcfg, 1);
815 if ((strcmp(mml->mml_marker.cm_comment, marker->cm_comment) == 0) &&
816 (strcmp(mml->mml_marker.cm_tgtname, marker->cm_tgtname) == 0) &&
817 !(marker->cm_flags & CM_SKIP)) {
818 /* Found a non-skipped marker match */
819 CDEBUG(D_MGS, "Changing rec %u marker %d %x->%x: %s %s\n",
820 rec->lrh_index, marker->cm_step,
821 marker->cm_flags, mml->mml_marker.cm_flags,
822 marker->cm_tgtname, marker->cm_comment);
823 /* Overwrite the old marker llog entry */
824 marker->cm_flags &= ~CM_EXCLUDE; /* in case we're unexcluding */
825 marker->cm_flags |= mml->mml_marker.cm_flags;
826 marker->cm_canceltime = mml->mml_marker.cm_canceltime;
827 rc = llog_write(env, llh, rec, rec->lrh_index);
836 * Modify an existing config log record (for CM_SKIP or CM_EXCLUDE)
838 * 0 - modified successfully,
839 * 1 - no modification was done
842 static int mgs_modify(const struct lu_env *env, struct mgs_device *mgs,
843 struct fs_db *fsdb, struct mgs_target_info *mti,
844 char *logname, char *devname, char *comment, int flags)
846 struct llog_handle *loghandle;
847 struct llog_ctxt *ctxt;
848 struct mgs_modify_lookup *mml;
853 LASSERT(mutex_is_locked(&fsdb->fsdb_mutex));
854 CDEBUG(D_MGS, "modify %s/%s/%s fl=%x\n", logname, devname, comment,
857 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
858 LASSERT(ctxt != NULL);
859 rc = llog_open(env, ctxt, &loghandle, NULL, logname, LLOG_OPEN_EXISTS);
866 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
870 if (llog_get_size(loghandle) <= 1)
871 GOTO(out_close, rc = 0);
875 GOTO(out_close, rc = -ENOMEM);
876 if (strlcpy(mml->mml_marker.cm_comment, comment,
877 sizeof(mml->mml_marker.cm_comment)) >=
878 sizeof(mml->mml_marker.cm_comment))
879 GOTO(out_free, rc = -E2BIG);
880 if (strlcpy(mml->mml_marker.cm_tgtname, devname,
881 sizeof(mml->mml_marker.cm_tgtname)) >=
882 sizeof(mml->mml_marker.cm_tgtname))
883 GOTO(out_free, rc = -E2BIG);
884 /* Modify mostly means cancel */
885 mml->mml_marker.cm_flags = flags;
886 mml->mml_marker.cm_canceltime = flags ? cfs_time_current_sec() : 0;
887 mml->mml_modified = 0;
888 rc = llog_process(env, loghandle, mgs_modify_handler, (void *)mml,
890 if (!rc && !mml->mml_modified)
897 llog_close(env, loghandle);
900 CERROR("%s: modify %s/%s failed: rc = %d\n",
901 mgs->mgs_obd->obd_name, mti->mti_svname, comment, rc);
906 /** This structure is passed to mgs_replace_handler */
907 struct mgs_replace_uuid_lookup {
908 /* Nids are replaced for this target device */
909 struct mgs_target_info target;
910 /* Temporary modified llog */
911 struct llog_handle *temp_llh;
912 /* Flag is set if in target block*/
913 int in_target_device;
914 /* Nids already added. Just skip (multiple nids) */
915 int device_nids_added;
916 /* Flag is set if this block should not be copied */
921 * Check: a) if block should be skipped
922 * b) is it target block
927 * \retval 0 should not to be skipped
928 * \retval 1 should to be skipped
930 static int check_markers(struct lustre_cfg *lcfg,
931 struct mgs_replace_uuid_lookup *mrul)
933 struct cfg_marker *marker;
935 /* Track markers. Find given device */
936 if (lcfg->lcfg_command == LCFG_MARKER) {
937 marker = lustre_cfg_buf(lcfg, 1);
938 /* Clean llog from records marked as CM_EXCLUDE.
939 CM_SKIP records are used for "active" command
940 and can be restored if needed */
941 if ((marker->cm_flags & (CM_EXCLUDE | CM_START)) ==
942 (CM_EXCLUDE | CM_START)) {
947 if ((marker->cm_flags & (CM_EXCLUDE | CM_END)) ==
948 (CM_EXCLUDE | CM_END)) {
953 if (strcmp(mrul->target.mti_svname, marker->cm_tgtname) == 0) {
954 LASSERT(!(marker->cm_flags & CM_START) ||
955 !(marker->cm_flags & CM_END));
956 if (marker->cm_flags & CM_START) {
957 mrul->in_target_device = 1;
958 mrul->device_nids_added = 0;
959 } else if (marker->cm_flags & CM_END)
960 mrul->in_target_device = 0;
967 static int record_base(const struct lu_env *env, struct llog_handle *llh,
968 char *cfgname, lnet_nid_t nid, int cmd,
969 char *s1, char *s2, char *s3, char *s4)
971 struct mgs_thread_info *mgi = mgs_env_info(env);
972 struct llog_cfg_rec *lcr;
975 CDEBUG(D_MGS, "lcfg %s %#x %s %s %s %s\n", cfgname,
976 cmd, s1, s2, s3, s4);
978 lustre_cfg_bufs_reset(&mgi->mgi_bufs, cfgname);
980 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, s1);
982 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 2, s2);
984 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 3, s3);
986 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 4, s4);
988 lcr = lustre_cfg_rec_new(cmd, &mgi->mgi_bufs);
992 lcr->lcr_cfg.lcfg_nid = nid;
993 rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
995 lustre_cfg_rec_free(lcr);
999 "failed to write lcfg %s %#x %s %s %s %s: rc = %d\n",
1000 cfgname, cmd, s1, s2, s3, s4, rc);
1004 static inline int record_add_uuid(const struct lu_env *env,
1005 struct llog_handle *llh,
1006 uint64_t nid, char *uuid)
1008 return record_base(env, llh, NULL, nid, LCFG_ADD_UUID, uuid,
1012 static inline int record_add_conn(const struct lu_env *env,
1013 struct llog_handle *llh,
1014 char *devname, char *uuid)
1016 return record_base(env, llh, devname, 0, LCFG_ADD_CONN, uuid,
1020 static inline int record_attach(const struct lu_env *env,
1021 struct llog_handle *llh, char *devname,
1022 char *type, char *uuid)
1024 return record_base(env, llh, devname, 0, LCFG_ATTACH, type, uuid,
1028 static inline int record_setup(const struct lu_env *env,
1029 struct llog_handle *llh, char *devname,
1030 char *s1, char *s2, char *s3, char *s4)
1032 return record_base(env, llh, devname, 0, LCFG_SETUP, s1, s2, s3, s4);
1036 * \retval <0 record processing error
1037 * \retval n record is processed. No need copy original one.
1038 * \retval 0 record is not processed.
1040 static int process_command(const struct lu_env *env, struct lustre_cfg *lcfg,
1041 struct mgs_replace_uuid_lookup *mrul)
1048 if (lcfg->lcfg_command == LCFG_ADD_UUID) {
1049 /* LCFG_ADD_UUID command found. Let's skip original command
1050 and add passed nids */
1051 ptr = mrul->target.mti_params;
1052 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
1053 CDEBUG(D_MGS, "add nid %s with uuid %s, "
1054 "device %s\n", libcfs_nid2str(nid),
1055 mrul->target.mti_params,
1056 mrul->target.mti_svname);
1057 rc = record_add_uuid(env,
1058 mrul->temp_llh, nid,
1059 mrul->target.mti_params);
1064 if (nids_added == 0) {
1065 CERROR("No new nids were added, nid %s with uuid %s, "
1066 "device %s\n", libcfs_nid2str(nid),
1067 mrul->target.mti_params,
1068 mrul->target.mti_svname);
1071 mrul->device_nids_added = 1;
1077 if (mrul->device_nids_added && lcfg->lcfg_command == LCFG_SETUP) {
1078 /* LCFG_SETUP command found. UUID should be changed */
1079 rc = record_setup(env,
1081 /* devname the same */
1082 lustre_cfg_string(lcfg, 0),
1083 /* s1 is not changed */
1084 lustre_cfg_string(lcfg, 1),
1085 /* new uuid should be
1087 mrul->target.mti_params,
1088 /* s3 is not changed */
1089 lustre_cfg_string(lcfg, 3),
1090 /* s4 is not changed */
1091 lustre_cfg_string(lcfg, 4));
1095 /* Another commands in target device block */
1100 * Handler that called for every record in llog.
1101 * Records are processed in order they placed in llog.
1103 * \param[in] llh log to be processed
1104 * \param[in] rec current record
1105 * \param[in] data mgs_replace_uuid_lookup structure
1109 static int mgs_replace_handler(const struct lu_env *env,
1110 struct llog_handle *llh,
1111 struct llog_rec_hdr *rec,
1114 struct mgs_replace_uuid_lookup *mrul;
1115 struct lustre_cfg *lcfg = REC_DATA(rec);
1116 int cfg_len = REC_DATA_LEN(rec);
1120 mrul = (struct mgs_replace_uuid_lookup *)data;
1122 if (rec->lrh_type != OBD_CFG_REC) {
1123 CERROR("unhandled lrh_type: %#x, cmd %x %s %s\n",
1124 rec->lrh_type, lcfg->lcfg_command,
1125 lustre_cfg_string(lcfg, 0),
1126 lustre_cfg_string(lcfg, 1));
1130 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
1132 /* Do not copy any invalidated records */
1133 GOTO(skip_out, rc = 0);
1136 rc = check_markers(lcfg, mrul);
1137 if (rc || mrul->skip_it)
1138 GOTO(skip_out, rc = 0);
1140 /* Write to new log all commands outside target device block */
1141 if (!mrul->in_target_device)
1142 GOTO(copy_out, rc = 0);
1144 /* Skip all other LCFG_ADD_UUID and LCFG_ADD_CONN records
1145 (failover nids) for this target, assuming that if then
1146 primary is changing then so is the failover */
1147 if (mrul->device_nids_added &&
1148 (lcfg->lcfg_command == LCFG_ADD_UUID ||
1149 lcfg->lcfg_command == LCFG_ADD_CONN))
1150 GOTO(skip_out, rc = 0);
1152 rc = process_command(env, lcfg, mrul);
1159 /* Record is placed in temporary llog as is */
1160 rc = llog_write(env, mrul->temp_llh, rec, LLOG_NEXT_IDX);
1162 CDEBUG(D_MGS, "Copied idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
1163 rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
1164 lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
1168 CDEBUG(D_MGS, "Skipped idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
1169 rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
1170 lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
1174 static int mgs_log_is_empty(const struct lu_env *env,
1175 struct mgs_device *mgs, char *name)
1177 struct llog_ctxt *ctxt;
1180 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
1181 LASSERT(ctxt != NULL);
1183 rc = llog_is_empty(env, ctxt, name);
1184 llog_ctxt_put(ctxt);
1188 static int mgs_replace_nids_log(const struct lu_env *env,
1189 struct obd_device *mgs, struct fs_db *fsdb,
1190 char *logname, char *devname, char *nids)
1192 struct llog_handle *orig_llh, *backup_llh;
1193 struct llog_ctxt *ctxt;
1194 struct mgs_replace_uuid_lookup *mrul;
1195 struct mgs_device *mgs_dev = lu2mgs_dev(mgs->obd_lu_dev);
1196 static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" };
1201 CDEBUG(D_MGS, "Replace nids for %s in %s\n", devname, logname);
1203 ctxt = llog_get_context(mgs, LLOG_CONFIG_ORIG_CTXT);
1204 LASSERT(ctxt != NULL);
1206 if (mgs_log_is_empty(env, mgs_dev, logname)) {
1207 /* Log is empty. Nothing to replace */
1208 GOTO(out_put, rc = 0);
1211 OBD_ALLOC(backup, strlen(logname) + strlen(".bak") + 1);
1213 GOTO(out_put, rc = -ENOMEM);
1215 sprintf(backup, "%s.bak", logname);
1217 rc = llog_backup(env, mgs, ctxt, ctxt, logname, backup);
1219 /* Now erase original log file. Connections are not allowed.
1220 Backup is already saved */
1221 rc = llog_erase(env, ctxt, NULL, logname);
1224 } else if (rc != -ENOENT) {
1225 CERROR("%s: can't make backup for %s: rc = %d\n",
1226 mgs->obd_name, logname, rc);
1230 /* open local log */
1231 rc = llog_open_create(env, ctxt, &orig_llh, NULL, logname);
1233 GOTO(out_restore, rc);
1235 rc = llog_init_handle(env, orig_llh, LLOG_F_IS_PLAIN, &cfg_uuid);
1237 GOTO(out_closel, rc);
1239 /* open backup llog */
1240 rc = llog_open(env, ctxt, &backup_llh, NULL, backup,
1243 GOTO(out_closel, rc);
1245 rc = llog_init_handle(env, backup_llh, LLOG_F_IS_PLAIN, NULL);
1247 GOTO(out_close, rc);
1249 if (llog_get_size(backup_llh) <= 1)
1250 GOTO(out_close, rc = 0);
1252 OBD_ALLOC_PTR(mrul);
1254 GOTO(out_close, rc = -ENOMEM);
1255 /* devname is only needed information to replace UUID records */
1256 strlcpy(mrul->target.mti_svname, devname,
1257 sizeof(mrul->target.mti_svname));
1258 /* parse nids later */
1259 strlcpy(mrul->target.mti_params, nids, sizeof(mrul->target.mti_params));
1260 /* Copy records to this temporary llog */
1261 mrul->temp_llh = orig_llh;
1263 rc = llog_process(env, backup_llh, mgs_replace_handler,
1264 (void *)mrul, NULL);
1267 rc2 = llog_close(NULL, backup_llh);
1271 rc2 = llog_close(NULL, orig_llh);
1277 CERROR("%s: llog should be restored: rc = %d\n",
1279 rc2 = llog_backup(env, mgs, ctxt, ctxt, backup,
1282 CERROR("%s: can't restore backup %s: rc = %d\n",
1283 mgs->obd_name, logname, rc2);
1287 OBD_FREE(backup, strlen(backup) + 1);
1290 llog_ctxt_put(ctxt);
1293 CERROR("%s: failed to replace nids in log %s: rc = %d\n",
1294 mgs->obd_name, logname, rc);
1300 * Parse device name and get file system name and/or device index
1302 * \param[in] devname device name (ex. lustre-MDT0000)
1303 * \param[out] fsname file system name(optional)
1304 * \param[out] index device index(optional)
1308 static int mgs_parse_devname(char *devname, char *fsname, __u32 *index)
1313 /* Extract fsname */
1315 rc = server_name2fsname(devname, fsname, NULL);
1317 CDEBUG(D_MGS, "Device name %s without fsname\n",
1324 rc = server_name2index(devname, index, NULL);
1326 CDEBUG(D_MGS, "Device name %s with wrong index\n",
1335 /* This is only called during replace_nids */
1336 static int only_mgs_is_running(struct obd_device *mgs_obd)
1338 /* TDB: Is global variable with devices count exists? */
1339 int num_devices = get_devices_count();
1340 int num_exports = 0;
1341 struct obd_export *exp;
1343 spin_lock(&mgs_obd->obd_dev_lock);
1344 list_for_each_entry(exp, &mgs_obd->obd_exports, exp_obd_chain) {
1345 /* skip self export */
1346 if (exp == mgs_obd->obd_self_export)
1348 if (exp_connect_flags(exp) & OBD_CONNECT_MDS_MDS)
1353 CERROR("%s: node %s still connected during replace_nids "
1354 "connect_flags:%llx\n",
1356 libcfs_nid2str(exp->exp_nid_stats->nid),
1357 exp_connect_flags(exp));
1360 spin_unlock(&mgs_obd->obd_dev_lock);
1362 /* osd, MGS and MGC + self_export
1363 (wc -l /proc/fs/lustre/devices <= 2) && (non self exports == 0) */
1364 return (num_devices <= 3) && (num_exports == 0);
1367 static int name_create_mdt(char **logname, char *fsname, int i)
1371 sprintf(mdt_index, "-MDT%04x", i);
1372 return name_create(logname, fsname, mdt_index);
1376 * Replace nids for \a device to \a nids values
1378 * \param obd MGS obd device
1379 * \param devname nids need to be replaced for this device
1380 * (ex. lustre-OST0000)
1381 * \param nids nids list (ex. nid1,nid2,nid3)
1385 int mgs_replace_nids(const struct lu_env *env,
1386 struct mgs_device *mgs,
1387 char *devname, char *nids)
1389 /* Assume fsname is part of device name */
1390 char fsname[MTI_NAME_MAXLEN];
1394 struct fs_db *fsdb = NULL;
1397 struct obd_device *mgs_obd = mgs->mgs_obd;
1400 /* We can only change NIDs if no other nodes are connected */
1401 spin_lock(&mgs_obd->obd_dev_lock);
1402 conn_state = mgs_obd->obd_no_conn;
1403 mgs_obd->obd_no_conn = 1;
1404 spin_unlock(&mgs_obd->obd_dev_lock);
1406 /* We can not change nids if not only MGS is started */
1407 if (!only_mgs_is_running(mgs_obd)) {
1408 CERROR("Only MGS is allowed to be started\n");
1409 GOTO(out, rc = -EINPROGRESS);
1412 /* Get fsname and index*/
1413 rc = mgs_parse_devname(devname, fsname, &index);
1417 rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
1419 CERROR("%s: can't find fsdb: rc = %d\n", fsname, rc);
1423 /* Process client llogs */
1424 name_create(&logname, fsname, "-client");
1425 rc = mgs_replace_nids_log(env, mgs_obd, fsdb, logname, devname, nids);
1426 name_destroy(&logname);
1428 CERROR("%s: error while replacing NIDs for %s: rc = %d\n",
1429 fsname, devname, rc);
1433 /* Process MDT llogs */
1434 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
1435 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
1437 name_create_mdt(&logname, fsname, i);
1438 rc = mgs_replace_nids_log(env, mgs_obd, fsdb, logname, devname, nids);
1439 name_destroy(&logname);
1445 spin_lock(&mgs_obd->obd_dev_lock);
1446 mgs_obd->obd_no_conn = conn_state;
1447 spin_unlock(&mgs_obd->obd_dev_lock);
1450 mgs_put_fsdb(mgs, fsdb);
1455 static int record_lov_setup(const struct lu_env *env, struct llog_handle *llh,
1456 char *devname, struct lov_desc *desc)
1458 struct mgs_thread_info *mgi = mgs_env_info(env);
1459 struct llog_cfg_rec *lcr;
1462 lustre_cfg_bufs_reset(&mgi->mgi_bufs, devname);
1463 lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, desc, sizeof(*desc));
1464 lcr = lustre_cfg_rec_new(LCFG_SETUP, &mgi->mgi_bufs);
1468 rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1469 lustre_cfg_rec_free(lcr);
1473 static int record_lmv_setup(const struct lu_env *env, struct llog_handle *llh,
1474 char *devname, struct lmv_desc *desc)
1476 struct mgs_thread_info *mgi = mgs_env_info(env);
1477 struct llog_cfg_rec *lcr;
1480 lustre_cfg_bufs_reset(&mgi->mgi_bufs, devname);
1481 lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, desc, sizeof(*desc));
1482 lcr = lustre_cfg_rec_new(LCFG_SETUP, &mgi->mgi_bufs);
1486 rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1487 lustre_cfg_rec_free(lcr);
1491 static inline int record_mdc_add(const struct lu_env *env,
1492 struct llog_handle *llh,
1493 char *logname, char *mdcuuid,
1494 char *mdtuuid, char *index,
1497 return record_base(env,llh,logname,0,LCFG_ADD_MDC,
1498 mdtuuid,index,gen,mdcuuid);
1501 static inline int record_lov_add(const struct lu_env *env,
1502 struct llog_handle *llh,
1503 char *lov_name, char *ost_uuid,
1504 char *index, char *gen)
1506 return record_base(env, llh, lov_name, 0, LCFG_LOV_ADD_OBD,
1507 ost_uuid, index, gen, NULL);
1510 static inline int record_mount_opt(const struct lu_env *env,
1511 struct llog_handle *llh,
1512 char *profile, char *lov_name,
1515 return record_base(env, llh, NULL, 0, LCFG_MOUNTOPT,
1516 profile, lov_name, mdc_name, NULL);
1519 static int record_marker(const struct lu_env *env,
1520 struct llog_handle *llh,
1521 struct fs_db *fsdb, __u32 flags,
1522 char *tgtname, char *comment)
1524 struct mgs_thread_info *mgi = mgs_env_info(env);
1525 struct llog_cfg_rec *lcr;
1529 if (flags & CM_START)
1531 mgi->mgi_marker.cm_step = fsdb->fsdb_gen;
1532 mgi->mgi_marker.cm_flags = flags;
1533 mgi->mgi_marker.cm_vers = LUSTRE_VERSION_CODE;
1534 cplen = strlcpy(mgi->mgi_marker.cm_tgtname, tgtname,
1535 sizeof(mgi->mgi_marker.cm_tgtname));
1536 if (cplen >= sizeof(mgi->mgi_marker.cm_tgtname))
1538 cplen = strlcpy(mgi->mgi_marker.cm_comment, comment,
1539 sizeof(mgi->mgi_marker.cm_comment));
1540 if (cplen >= sizeof(mgi->mgi_marker.cm_comment))
1542 mgi->mgi_marker.cm_createtime = cfs_time_current_sec();
1543 mgi->mgi_marker.cm_canceltime = 0;
1544 lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL);
1545 lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, &mgi->mgi_marker,
1546 sizeof(mgi->mgi_marker));
1547 lcr = lustre_cfg_rec_new(LCFG_MARKER, &mgi->mgi_bufs);
1551 rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1552 lustre_cfg_rec_free(lcr);
1556 static int record_start_log(const struct lu_env *env, struct mgs_device *mgs,
1557 struct llog_handle **llh, char *name)
1559 static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" };
1560 struct llog_ctxt *ctxt;
1565 GOTO(out, rc = -EBUSY);
1567 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
1569 GOTO(out, rc = -ENODEV);
1570 LASSERT(ctxt->loc_obd == mgs->mgs_obd);
1572 rc = llog_open_create(env, ctxt, llh, NULL, name);
1575 rc = llog_init_handle(env, *llh, LLOG_F_IS_PLAIN, &cfg_uuid);
1577 llog_close(env, *llh);
1579 llog_ctxt_put(ctxt);
1582 CERROR("%s: can't start log %s: rc = %d\n",
1583 mgs->mgs_obd->obd_name, name, rc);
1589 static int record_end_log(const struct lu_env *env, struct llog_handle **llh)
1593 rc = llog_close(env, *llh);
1599 /******************** config "macros" *********************/
1601 /* write an lcfg directly into a log (with markers) */
1602 static int mgs_write_log_direct(const struct lu_env *env,
1603 struct mgs_device *mgs, struct fs_db *fsdb,
1604 char *logname, struct llog_cfg_rec *lcr,
1605 char *devname, char *comment)
1607 struct llog_handle *llh = NULL;
1612 rc = record_start_log(env, mgs, &llh, logname);
1616 /* FIXME These should be a single journal transaction */
1617 rc = record_marker(env, llh, fsdb, CM_START, devname, comment);
1620 rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1623 rc = record_marker(env, llh, fsdb, CM_END, devname, comment);
1627 record_end_log(env, &llh);
1631 /* write the lcfg in all logs for the given fs */
1632 static int mgs_write_log_direct_all(const struct lu_env *env,
1633 struct mgs_device *mgs,
1635 struct mgs_target_info *mti,
1636 struct llog_cfg_rec *lcr, char *devname,
1637 char *comment, int server_only)
1639 struct list_head log_list;
1640 struct mgs_direntry *dirent, *n;
1641 char *fsname = mti->mti_fsname;
1642 int rc = 0, len = strlen(fsname);
1645 /* Find all the logs in the CONFIGS directory */
1646 rc = class_dentry_readdir(env, mgs, &log_list);
1650 /* Could use fsdb index maps instead of directory listing */
1651 list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
1652 list_del_init(&dirent->mde_list);
1653 /* don't write to sptlrpc rule log */
1654 if (strstr(dirent->mde_name, "-sptlrpc") != NULL)
1657 /* caller wants write server logs only */
1658 if (server_only && strstr(dirent->mde_name, "-client") != NULL)
1661 if (strlen(dirent->mde_name) <= len ||
1662 strncmp(fsname, dirent->mde_name, len) != 0 ||
1663 dirent->mde_name[len] != '-')
1666 CDEBUG(D_MGS, "Changing log %s\n", dirent->mde_name);
1667 /* Erase any old settings of this same parameter */
1668 rc = mgs_modify(env, mgs, fsdb, mti, dirent->mde_name,
1669 devname, comment, CM_SKIP);
1671 CERROR("%s: Can't modify llog %s: rc = %d\n",
1672 mgs->mgs_obd->obd_name, dirent->mde_name, rc);
1675 /* Write the new one */
1676 rc = mgs_write_log_direct(env, mgs, fsdb, dirent->mde_name,
1677 lcr, devname, comment);
1679 CERROR("%s: writing log %s: rc = %d\n",
1680 mgs->mgs_obd->obd_name, dirent->mde_name, rc);
1682 mgs_direntry_free(dirent);
1688 static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
1689 struct mgs_device *mgs,
1691 struct mgs_target_info *mti,
1692 int index, char *logname);
1693 static int mgs_write_log_osc_to_lov(const struct lu_env *env,
1694 struct mgs_device *mgs,
1696 struct mgs_target_info *mti,
1697 char *logname, char *suffix, char *lovname,
1698 enum lustre_sec_part sec_part, int flags);
1699 static int name_create_mdt_and_lov(char **logname, char **lovname,
1700 struct fs_db *fsdb, int i);
1702 static int add_param(char *params, char *key, char *val)
1704 char *start = params + strlen(params);
1705 char *end = params + sizeof(((struct mgs_target_info *)0)->mti_params);
1709 keylen = strlen(key);
1710 if (start + 1 + keylen + strlen(val) >= end) {
1711 CERROR("params are too long: %s %s%s\n",
1712 params, key != NULL ? key : "", val);
1716 sprintf(start, " %s%s", key != NULL ? key : "", val);
1721 * Walk through client config log record and convert the related records
1724 static int mgs_steal_client_llog_handler(const struct lu_env *env,
1725 struct llog_handle *llh,
1726 struct llog_rec_hdr *rec, void *data)
1728 struct mgs_device *mgs;
1729 struct obd_device *obd;
1730 struct mgs_target_info *mti, *tmti;
1732 int cfg_len = rec->lrh_len;
1733 char *cfg_buf = (char*) (rec + 1);
1734 struct lustre_cfg *lcfg;
1736 struct llog_handle *mdt_llh = NULL;
1737 static int got_an_osc_or_mdc = 0;
1738 /* 0: not found any osc/mdc;
1742 static int last_step = -1;
1747 mti = ((struct temp_comp*)data)->comp_mti;
1748 tmti = ((struct temp_comp*)data)->comp_tmti;
1749 fsdb = ((struct temp_comp*)data)->comp_fsdb;
1750 obd = ((struct temp_comp *)data)->comp_obd;
1751 mgs = lu2mgs_dev(obd->obd_lu_dev);
1754 if (rec->lrh_type != OBD_CFG_REC) {
1755 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
1759 rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
1761 CERROR("Insane cfg\n");
1765 lcfg = (struct lustre_cfg *)cfg_buf;
1767 if (lcfg->lcfg_command == LCFG_MARKER) {
1768 struct cfg_marker *marker;
1769 marker = lustre_cfg_buf(lcfg, 1);
1770 if (!strncmp(marker->cm_comment, "add osc", 7) &&
1771 (marker->cm_flags & CM_START) &&
1772 !(marker->cm_flags & CM_SKIP)) {
1773 got_an_osc_or_mdc = 1;
1774 cplen = strlcpy(tmti->mti_svname, marker->cm_tgtname,
1775 sizeof(tmti->mti_svname));
1776 if (cplen >= sizeof(tmti->mti_svname))
1778 rc = record_start_log(env, mgs, &mdt_llh,
1782 rc = record_marker(env, mdt_llh, fsdb, CM_START,
1783 mti->mti_svname, "add osc(copied)");
1784 record_end_log(env, &mdt_llh);
1785 last_step = marker->cm_step;
1788 if (!strncmp(marker->cm_comment, "add osc", 7) &&
1789 (marker->cm_flags & CM_END) &&
1790 !(marker->cm_flags & CM_SKIP)) {
1791 LASSERT(last_step == marker->cm_step);
1793 got_an_osc_or_mdc = 0;
1794 memset(tmti, 0, sizeof(*tmti));
1795 rc = record_start_log(env, mgs, &mdt_llh,
1799 rc = record_marker(env, mdt_llh, fsdb, CM_END,
1800 mti->mti_svname, "add osc(copied)");
1801 record_end_log(env, &mdt_llh);
1804 if (!strncmp(marker->cm_comment, "add mdc", 7) &&
1805 (marker->cm_flags & CM_START) &&
1806 !(marker->cm_flags & CM_SKIP)) {
1807 got_an_osc_or_mdc = 2;
1808 last_step = marker->cm_step;
1809 memcpy(tmti->mti_svname, marker->cm_tgtname,
1810 strlen(marker->cm_tgtname));
1814 if (!strncmp(marker->cm_comment, "add mdc", 7) &&
1815 (marker->cm_flags & CM_END) &&
1816 !(marker->cm_flags & CM_SKIP)) {
1817 LASSERT(last_step == marker->cm_step);
1819 got_an_osc_or_mdc = 0;
1820 memset(tmti, 0, sizeof(*tmti));
1825 if (got_an_osc_or_mdc == 0 || last_step < 0)
1828 if (lcfg->lcfg_command == LCFG_ADD_UUID) {
1829 __u64 nodenid = lcfg->lcfg_nid;
1831 if (strlen(tmti->mti_uuid) == 0) {
1832 /* target uuid not set, this config record is before
1833 * LCFG_SETUP, this nid is one of target node nid.
1835 tmti->mti_nids[tmti->mti_nid_count] = nodenid;
1836 tmti->mti_nid_count++;
1838 char nidstr[LNET_NIDSTR_SIZE];
1840 /* failover node nid */
1841 libcfs_nid2str_r(nodenid, nidstr, sizeof(nidstr));
1842 rc = add_param(tmti->mti_params, PARAM_FAILNODE,
1849 if (lcfg->lcfg_command == LCFG_SETUP) {
1852 target = lustre_cfg_string(lcfg, 1);
1853 memcpy(tmti->mti_uuid, target, strlen(target));
1857 /* ignore client side sptlrpc_conf_log */
1858 if (lcfg->lcfg_command == LCFG_SPTLRPC_CONF)
1861 if (lcfg->lcfg_command == LCFG_ADD_MDC) {
1864 if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1)
1867 memcpy(tmti->mti_fsname, mti->mti_fsname,
1868 strlen(mti->mti_fsname));
1869 tmti->mti_stripe_index = index;
1871 rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb, tmti,
1872 mti->mti_stripe_index,
1874 memset(tmti, 0, sizeof(*tmti));
1878 if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD) {
1881 char *logname, *lovname;
1883 rc = name_create_mdt_and_lov(&logname, &lovname, fsdb,
1884 mti->mti_stripe_index);
1887 sprintf(mdt_index, "-MDT%04x", mti->mti_stripe_index);
1889 if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1) {
1890 name_destroy(&logname);
1891 name_destroy(&lovname);
1895 tmti->mti_stripe_index = index;
1896 rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, tmti, logname,
1899 name_destroy(&logname);
1900 name_destroy(&lovname);
1906 /* fsdb->fsdb_mutex is already held in mgs_write_log_target*/
1907 /* stealed from mgs_get_fsdb_from_llog*/
1908 static int mgs_steal_llog_for_mdt_from_client(const struct lu_env *env,
1909 struct mgs_device *mgs,
1911 struct temp_comp* comp)
1913 struct llog_handle *loghandle;
1914 struct mgs_target_info *tmti;
1915 struct llog_ctxt *ctxt;
1920 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
1921 LASSERT(ctxt != NULL);
1923 OBD_ALLOC_PTR(tmti);
1925 GOTO(out_ctxt, rc = -ENOMEM);
1927 comp->comp_tmti = tmti;
1928 comp->comp_obd = mgs->mgs_obd;
1930 rc = llog_open(env, ctxt, &loghandle, NULL, client_name,
1938 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
1940 GOTO(out_close, rc);
1942 rc = llog_process_or_fork(env, loghandle, mgs_steal_client_llog_handler,
1943 (void *)comp, NULL, false);
1944 CDEBUG(D_MGS, "steal llog re = %d\n", rc);
1946 llog_close(env, loghandle);
1950 llog_ctxt_put(ctxt);
1954 /* lmv is the second thing for client logs */
1955 /* copied from mgs_write_log_lov. Please refer to that. */
1956 static int mgs_write_log_lmv(const struct lu_env *env,
1957 struct mgs_device *mgs,
1959 struct mgs_target_info *mti,
1960 char *logname, char *lmvname)
1962 struct llog_handle *llh = NULL;
1963 struct lmv_desc *lmvdesc;
1968 CDEBUG(D_MGS, "Writing lmv(%s) log for %s\n", lmvname,logname);
1970 OBD_ALLOC_PTR(lmvdesc);
1971 if (lmvdesc == NULL)
1973 lmvdesc->ld_active_tgt_count = 0;
1974 lmvdesc->ld_tgt_count = 0;
1975 sprintf((char*)lmvdesc->ld_uuid.uuid, "%s_UUID", lmvname);
1976 uuid = (char *)lmvdesc->ld_uuid.uuid;
1978 rc = record_start_log(env, mgs, &llh, logname);
1981 rc = record_marker(env, llh, fsdb, CM_START, lmvname, "lmv setup");
1984 rc = record_attach(env, llh, lmvname, "lmv", uuid);
1987 rc = record_lmv_setup(env, llh, lmvname, lmvdesc);
1990 rc = record_marker(env, llh, fsdb, CM_END, lmvname, "lmv setup");
1994 record_end_log(env, &llh);
1996 OBD_FREE_PTR(lmvdesc);
2000 /* lov is the first thing in the mdt and client logs */
2001 static int mgs_write_log_lov(const struct lu_env *env, struct mgs_device *mgs,
2002 struct fs_db *fsdb, struct mgs_target_info *mti,
2003 char *logname, char *lovname)
2005 struct llog_handle *llh = NULL;
2006 struct lov_desc *lovdesc;
2011 CDEBUG(D_MGS, "Writing lov(%s) log for %s\n", lovname, logname);
2014 #01 L attach 0:lov_mdsA 1:lov 2:71ccb_lov_mdsA_19f961a9e1
2015 #02 L lov_setup 0:lov_mdsA 1:(struct lov_desc)
2016 uuid=lov1_UUID, stripe count=1, size=1048576, offset=0, pattern=0
2019 /* FIXME just make lov_setup accept empty desc (put uuid in buf 2) */
2020 OBD_ALLOC_PTR(lovdesc);
2021 if (lovdesc == NULL)
2023 lovdesc->ld_magic = LOV_DESC_MAGIC;
2024 lovdesc->ld_tgt_count = 0;
2025 /* Defaults. Can be changed later by lcfg config_param */
2026 lovdesc->ld_default_stripe_count = 1;
2027 lovdesc->ld_pattern = LOV_PATTERN_RAID0;
2028 lovdesc->ld_default_stripe_size = LOV_DESC_STRIPE_SIZE_DEFAULT;
2029 lovdesc->ld_default_stripe_offset = -1;
2030 lovdesc->ld_qos_maxage = LOV_DESC_QOS_MAXAGE_DEFAULT;
2031 sprintf((char*)lovdesc->ld_uuid.uuid, "%s_UUID", lovname);
2032 /* can these be the same? */
2033 uuid = (char *)lovdesc->ld_uuid.uuid;
2035 /* This should always be the first entry in a log.
2036 rc = mgs_clear_log(obd, logname); */
2037 rc = record_start_log(env, mgs, &llh, logname);
2040 /* FIXME these should be a single journal transaction */
2041 rc = record_marker(env, llh, fsdb, CM_START, lovname, "lov setup");
2044 rc = record_attach(env, llh, lovname, "lov", uuid);
2047 rc = record_lov_setup(env, llh, lovname, lovdesc);
2050 rc = record_marker(env, llh, fsdb, CM_END, lovname, "lov setup");
2055 record_end_log(env, &llh);
2057 OBD_FREE_PTR(lovdesc);
2061 /* add failnids to open log */
2062 static int mgs_write_log_failnids(const struct lu_env *env,
2063 struct mgs_target_info *mti,
2064 struct llog_handle *llh,
2067 char *failnodeuuid = NULL;
2068 char *ptr = mti->mti_params;
2073 #03 L add_uuid nid=uml1@tcp(0x20000c0a80201) nal=90 0: 1:uml1_UUID
2074 #04 L add_uuid nid=1@elan(0x1000000000001) nal=90 0: 1:uml1_UUID
2075 #05 L setup 0:OSC_uml1_ost1_mdsA 1:ost1_UUID 2:uml1_UUID
2076 #06 L add_uuid nid=uml2@tcp(0x20000c0a80202) nal=90 0: 1:uml2_UUID
2077 #0x L add_uuid nid=2@elan(0x1000000000002) nal=90 0: 1:uml2_UUID
2078 #07 L add_conn 0:OSC_uml1_ost1_mdsA 1:uml2_UUID
2082 * Pull failnid info out of params string, which may contain something
2083 * like "<nid1>,<nid2>:<nid3>,<nid4>". class_parse_nid() does not
2084 * complain about abnormal inputs like ",:<nid1>", "<nid1>:,<nid2>",
2085 * etc. However, convert_hostnames() should have caught those.
2087 while (class_find_param(ptr, PARAM_FAILNODE, &ptr) == 0) {
2088 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
2089 char nidstr[LNET_NIDSTR_SIZE];
2091 if (failnodeuuid == NULL) {
2092 /* We don't know the failover node name,
2093 * so just use the first nid as the uuid */
2094 libcfs_nid2str_r(nid, nidstr, sizeof(nidstr));
2095 rc = name_create(&failnodeuuid, nidstr, "");
2099 CDEBUG(D_MGS, "add nid %s for failover uuid %s, "
2101 libcfs_nid2str_r(nid, nidstr, sizeof(nidstr)),
2102 failnodeuuid, cliname);
2103 rc = record_add_uuid(env, llh, nid, failnodeuuid);
2105 * If *ptr is ':', we have added all NIDs for
2109 rc = record_add_conn(env, llh, cliname,
2111 name_destroy(&failnodeuuid);
2112 failnodeuuid = NULL;
2116 rc = record_add_conn(env, llh, cliname, failnodeuuid);
2117 name_destroy(&failnodeuuid);
2118 failnodeuuid = NULL;
2125 static int mgs_write_log_mdc_to_lmv(const struct lu_env *env,
2126 struct mgs_device *mgs,
2128 struct mgs_target_info *mti,
2129 char *logname, char *lmvname)
2131 struct llog_handle *llh = NULL;
2132 char *mdcname = NULL;
2133 char *nodeuuid = NULL;
2134 char *mdcuuid = NULL;
2135 char *lmvuuid = NULL;
2137 char nidstr[LNET_NIDSTR_SIZE];
2141 if (mgs_log_is_empty(env, mgs, logname)) {
2142 CERROR("log is empty! Logical error\n");
2146 CDEBUG(D_MGS, "adding mdc for %s to log %s:lmv(%s)\n",
2147 mti->mti_svname, logname, lmvname);
2149 libcfs_nid2str_r(mti->mti_nids[0], nidstr, sizeof(nidstr));
2150 rc = name_create(&nodeuuid, nidstr, "");
2153 rc = name_create(&mdcname, mti->mti_svname, "-mdc");
2156 rc = name_create(&mdcuuid, mdcname, "_UUID");
2159 rc = name_create(&lmvuuid, lmvname, "_UUID");
2163 rc = record_start_log(env, mgs, &llh, logname);
2166 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
2170 for (i = 0; i < mti->mti_nid_count; i++) {
2171 CDEBUG(D_MGS, "add nid %s for mdt\n",
2172 libcfs_nid2str_r(mti->mti_nids[i],
2173 nidstr, sizeof(nidstr)));
2175 rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid);
2180 rc = record_attach(env, llh, mdcname, LUSTRE_MDC_NAME, lmvuuid);
2183 rc = record_setup(env, llh, mdcname, mti->mti_uuid, nodeuuid,
2187 rc = mgs_write_log_failnids(env, mti, llh, mdcname);
2190 snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
2191 rc = record_mdc_add(env, llh, lmvname, mdcuuid, mti->mti_uuid,
2195 rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname,
2200 record_end_log(env, &llh);
2202 name_destroy(&lmvuuid);
2203 name_destroy(&mdcuuid);
2204 name_destroy(&mdcname);
2205 name_destroy(&nodeuuid);
2209 static inline int name_create_lov(char **lovname, char *mdtname,
2210 struct fs_db *fsdb, int index)
2213 if (index == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2214 return name_create(lovname, fsdb->fsdb_name, "-mdtlov");
2216 return name_create(lovname, mdtname, "-mdtlov");
2219 static int name_create_mdt_and_lov(char **logname, char **lovname,
2220 struct fs_db *fsdb, int i)
2224 rc = name_create_mdt(logname, fsdb->fsdb_name, i);
2228 if (i == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2229 rc = name_create(lovname, fsdb->fsdb_name, "-mdtlov");
2231 rc = name_create(lovname, *logname, "-mdtlov");
2233 name_destroy(logname);
2239 static inline int name_create_mdt_osc(char **oscname, char *ostname,
2240 struct fs_db *fsdb, int i)
2244 if (i == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2245 sprintf(suffix, "-osc");
2247 sprintf(suffix, "-osc-MDT%04x", i);
2248 return name_create(oscname, ostname, suffix);
2251 /* add new mdc to already existent MDS */
2252 static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
2253 struct mgs_device *mgs,
2255 struct mgs_target_info *mti,
2256 int mdt_index, char *logname)
2258 struct llog_handle *llh = NULL;
2259 char *nodeuuid = NULL;
2260 char *ospname = NULL;
2261 char *lovuuid = NULL;
2262 char *mdtuuid = NULL;
2263 char *svname = NULL;
2264 char *mdtname = NULL;
2265 char *lovname = NULL;
2267 char nidstr[LNET_NIDSTR_SIZE];
2271 if (mgs_log_is_empty(env, mgs, mti->mti_svname)) {
2272 CERROR("log is empty! Logical error\n");
2276 CDEBUG(D_MGS, "adding osp index %d to %s\n", mti->mti_stripe_index,
2279 rc = name_create_mdt(&mdtname, fsdb->fsdb_name, mti->mti_stripe_index);
2283 libcfs_nid2str_r(mti->mti_nids[0], nidstr, sizeof(nidstr));
2284 rc = name_create(&nodeuuid, nidstr, "");
2286 GOTO(out_destory, rc);
2288 rc = name_create(&svname, mdtname, "-osp");
2290 GOTO(out_destory, rc);
2292 sprintf(index_str, "-MDT%04x", mdt_index);
2293 rc = name_create(&ospname, svname, index_str);
2295 GOTO(out_destory, rc);
2297 rc = name_create_lov(&lovname, logname, fsdb, mdt_index);
2299 GOTO(out_destory, rc);
2301 rc = name_create(&lovuuid, lovname, "_UUID");
2303 GOTO(out_destory, rc);
2305 rc = name_create(&mdtuuid, mdtname, "_UUID");
2307 GOTO(out_destory, rc);
2309 rc = record_start_log(env, mgs, &llh, logname);
2311 GOTO(out_destory, rc);
2313 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
2316 GOTO(out_destory, rc);
2318 for (i = 0; i < mti->mti_nid_count; i++) {
2319 CDEBUG(D_MGS, "add nid %s for mdt\n",
2320 libcfs_nid2str_r(mti->mti_nids[i],
2321 nidstr, sizeof(nidstr)));
2322 rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid);
2327 rc = record_attach(env, llh, ospname, LUSTRE_OSP_NAME, lovuuid);
2331 rc = record_setup(env, llh, ospname, mti->mti_uuid, nodeuuid,
2336 rc = mgs_write_log_failnids(env, mti, llh, ospname);
2340 /* Add mdc(osp) to lod */
2341 snprintf(index_str, sizeof(index_str), "%d", mti->mti_stripe_index);
2342 rc = record_base(env, llh, lovname, 0, LCFG_ADD_MDC, mti->mti_uuid,
2343 index_str, "1", NULL);
2347 rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname, "add osp");
2352 record_end_log(env, &llh);
2355 name_destroy(&mdtuuid);
2356 name_destroy(&lovuuid);
2357 name_destroy(&lovname);
2358 name_destroy(&ospname);
2359 name_destroy(&svname);
2360 name_destroy(&nodeuuid);
2361 name_destroy(&mdtname);
2365 static int mgs_write_log_mdt0(const struct lu_env *env,
2366 struct mgs_device *mgs,
2368 struct mgs_target_info *mti)
2370 char *log = mti->mti_svname;
2371 struct llog_handle *llh = NULL;
2372 char *uuid, *lovname;
2374 char *ptr = mti->mti_params;
2375 int rc = 0, failout = 0;
2378 OBD_ALLOC(uuid, sizeof(struct obd_uuid));
2382 if (class_find_param(ptr, PARAM_FAILMODE, &ptr) == 0)
2383 failout = (strncmp(ptr, "failout", 7) == 0);
2385 rc = name_create(&lovname, log, "-mdtlov");
2388 if (mgs_log_is_empty(env, mgs, log)) {
2389 rc = mgs_write_log_lov(env, mgs, fsdb, mti, log, lovname);
2394 sprintf(mdt_index, "%d", mti->mti_stripe_index);
2396 rc = record_start_log(env, mgs, &llh, log);
2400 /* add MDT itself */
2402 /* FIXME this whole fn should be a single journal transaction */
2403 sprintf(uuid, "%s_UUID", log);
2404 rc = record_marker(env, llh, fsdb, CM_START, log, "add mdt");
2407 rc = record_attach(env, llh, log, LUSTRE_MDT_NAME, uuid);
2410 rc = record_mount_opt(env, llh, log, lovname, NULL);
2413 rc = record_setup(env, llh, log, uuid, mdt_index, lovname,
2414 failout ? "n" : "f");
2417 rc = record_marker(env, llh, fsdb, CM_END, log, "add mdt");
2421 record_end_log(env, &llh);
2423 name_destroy(&lovname);
2425 OBD_FREE(uuid, sizeof(struct obd_uuid));
2429 /* envelope method for all layers log */
2430 static int mgs_write_log_mdt(const struct lu_env *env,
2431 struct mgs_device *mgs,
2433 struct mgs_target_info *mti)
2435 struct mgs_thread_info *mgi = mgs_env_info(env);
2436 struct llog_handle *llh = NULL;
2441 CDEBUG(D_MGS, "writing new mdt %s\n", mti->mti_svname);
2443 if (mti->mti_uuid[0] == '\0') {
2444 /* Make up our own uuid */
2445 snprintf(mti->mti_uuid, sizeof(mti->mti_uuid),
2446 "%s_UUID", mti->mti_svname);
2450 rc = mgs_write_log_mdt0(env, mgs, fsdb, mti);
2453 /* Append the mdt info to the client log */
2454 rc = name_create(&cliname, mti->mti_fsname, "-client");
2458 if (mgs_log_is_empty(env, mgs, cliname)) {
2459 /* Start client log */
2460 rc = mgs_write_log_lov(env, mgs, fsdb, mti, cliname,
2464 rc = mgs_write_log_lmv(env, mgs, fsdb, mti, cliname,
2471 #09 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0: 1:uml1_UUID
2472 #10 L attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:1d834_MNT_client_03f
2473 #11 L setup 0:MDC_uml1_mdsA_MNT_client 1:mdsA_UUID 2:uml1_UUID
2474 #12 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0: 1:uml2_UUID
2475 #13 L add_conn 0:MDC_uml1_mdsA_MNT_client 1:uml2_UUID
2476 #14 L mount_option 0: 1:client 2:lov1 3:MDC_uml1_mdsA_MNT_client
2479 /* copy client info about lov/lmv */
2480 mgi->mgi_comp.comp_mti = mti;
2481 mgi->mgi_comp.comp_fsdb = fsdb;
2483 rc = mgs_steal_llog_for_mdt_from_client(env, mgs, cliname,
2487 rc = mgs_write_log_mdc_to_lmv(env, mgs, fsdb, mti, cliname,
2493 rc = record_start_log(env, mgs, &llh, cliname);
2497 rc = record_marker(env, llh, fsdb, CM_START, cliname,
2501 rc = record_mount_opt(env, llh, cliname, fsdb->fsdb_clilov,
2505 rc = record_marker(env, llh, fsdb, CM_END, cliname,
2511 /* for_all_existing_mdt except current one */
2512 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
2513 if (i != mti->mti_stripe_index &&
2514 test_bit(i, fsdb->fsdb_mdt_index_map)) {
2517 rc = name_create_mdt(&logname, fsdb->fsdb_name, i);
2521 /* NB: If the log for the MDT is empty, it means
2522 * the MDT is only added to the index
2523 * map, and not being process yet, i.e. this
2524 * is an unregistered MDT, see mgs_write_log_target().
2525 * so we should skip it. Otherwise
2527 * 1. MGS get register request for MDT1 and MDT2.
2529 * 2. Then both MDT1 and MDT2 are added into
2530 * fsdb_mdt_index_map. (see mgs_set_index()).
2532 * 3. Then MDT1 get the lock of fsdb_mutex, then
2533 * generate the config log, here, it will regard MDT2
2534 * as an existent MDT, and generate "add osp" for
2535 * lustre-MDT0001-osp-MDT0002. Note: at the moment
2536 * MDT0002 config log is still empty, so it will
2537 * add "add osp" even before "lov setup", which
2538 * will definitly cause trouble.
2540 * 4. MDT1 registeration finished, fsdb_mutex is
2541 * released, then MDT2 get in, then in above
2542 * mgs_steal_llog_for_mdt_from_client(), it will
2543 * add another osp log for lustre-MDT0001-osp-MDT0002,
2544 * which will cause another trouble.*/
2545 if (!mgs_log_is_empty(env, mgs, logname))
2546 rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb,
2549 name_destroy(&logname);
2555 record_end_log(env, &llh);
2557 name_destroy(&cliname);
2561 /* Add the ost info to the client/mdt lov */
2562 static int mgs_write_log_osc_to_lov(const struct lu_env *env,
2563 struct mgs_device *mgs, struct fs_db *fsdb,
2564 struct mgs_target_info *mti,
2565 char *logname, char *suffix, char *lovname,
2566 enum lustre_sec_part sec_part, int flags)
2568 struct llog_handle *llh = NULL;
2569 char *nodeuuid = NULL;
2570 char *oscname = NULL;
2571 char *oscuuid = NULL;
2572 char *lovuuid = NULL;
2573 char *svname = NULL;
2575 char nidstr[LNET_NIDSTR_SIZE];
2579 CDEBUG(D_INFO, "adding osc for %s to log %s\n",
2580 mti->mti_svname, logname);
2582 if (mgs_log_is_empty(env, mgs, logname)) {
2583 CERROR("log is empty! Logical error\n");
2587 libcfs_nid2str_r(mti->mti_nids[0], nidstr, sizeof(nidstr));
2588 rc = name_create(&nodeuuid, nidstr, "");
2591 rc = name_create(&svname, mti->mti_svname, "-osc");
2595 /* for the system upgraded from old 1.8, keep using the old osc naming
2596 * style for mdt, see name_create_mdt_osc(). LU-1257 */
2597 if (test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2598 rc = name_create(&oscname, svname, "");
2600 rc = name_create(&oscname, svname, suffix);
2604 rc = name_create(&oscuuid, oscname, "_UUID");
2607 rc = name_create(&lovuuid, lovname, "_UUID");
2613 #03 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0: 1:uml1_UUID
2615 #04 L add_uuid nid=1@elan(0x1000000000001) nal=90 0: 1:uml1_UUID
2616 #04 L attach 0:OSC_uml1_ost1_MNT_client 1:osc 2:89070_lov1_a41dff51a
2617 #05 L setup 0:OSC_uml1_ost1_MNT_client 1:ost1_UUID 2:uml1_UUID
2619 #06 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0: 1:uml2_UUID
2620 #07 L add_conn 0:OSC_uml1_ost1_MNT_client 1:uml2_UUID
2621 #08 L lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1
2624 rc = record_start_log(env, mgs, &llh, logname);
2628 /* FIXME these should be a single journal transaction */
2629 rc = record_marker(env, llh, fsdb, CM_START | flags, mti->mti_svname,
2634 /* NB: don't change record order, because upon MDT steal OSC config
2635 * from client, it treats all nids before LCFG_SETUP as target nids
2636 * (multiple interfaces), while nids after as failover node nids.
2637 * See mgs_steal_client_llog_handler() LCFG_ADD_UUID.
2639 for (i = 0; i < mti->mti_nid_count; i++) {
2640 CDEBUG(D_MGS, "add nid %s\n",
2641 libcfs_nid2str_r(mti->mti_nids[i],
2642 nidstr, sizeof(nidstr)));
2643 rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid);
2647 rc = record_attach(env, llh, oscname, LUSTRE_OSC_NAME, lovuuid);
2650 rc = record_setup(env, llh, oscname, mti->mti_uuid, nodeuuid,
2654 rc = mgs_write_log_failnids(env, mti, llh, oscname);
2658 snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
2660 rc = record_lov_add(env, llh, lovname, mti->mti_uuid, index, "1");
2663 rc = record_marker(env, llh, fsdb, CM_END | flags, mti->mti_svname,
2668 record_end_log(env, &llh);
2670 name_destroy(&lovuuid);
2671 name_destroy(&oscuuid);
2672 name_destroy(&oscname);
2673 name_destroy(&svname);
2674 name_destroy(&nodeuuid);
2678 static int mgs_write_log_ost(const struct lu_env *env,
2679 struct mgs_device *mgs, struct fs_db *fsdb,
2680 struct mgs_target_info *mti)
2682 struct llog_handle *llh = NULL;
2683 char *logname, *lovname;
2684 char *ptr = mti->mti_params;
2685 int rc, flags = 0, failout = 0, i;
2688 CDEBUG(D_MGS, "writing new ost %s\n", mti->mti_svname);
2690 /* The ost startup log */
2692 /* If the ost log already exists, that means that someone reformatted
2693 the ost and it called target_add again. */
2694 if (!mgs_log_is_empty(env, mgs, mti->mti_svname)) {
2695 LCONSOLE_ERROR_MSG(0x141, "The config log for %s already "
2696 "exists, yet the server claims it never "
2697 "registered. It may have been reformatted, "
2698 "or the index changed. writeconf the MDT to "
2699 "regenerate all logs.\n", mti->mti_svname);
2704 attach obdfilter ost1 ost1_UUID
2705 setup /dev/loop2 ldiskfs f|n errors=remount-ro,user_xattr
2707 if (class_find_param(ptr, PARAM_FAILMODE, &ptr) == 0)
2708 failout = (strncmp(ptr, "failout", 7) == 0);
2709 rc = record_start_log(env, mgs, &llh, mti->mti_svname);
2712 /* FIXME these should be a single journal transaction */
2713 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,"add ost");
2716 if (*mti->mti_uuid == '\0')
2717 snprintf(mti->mti_uuid, sizeof(mti->mti_uuid),
2718 "%s_UUID", mti->mti_svname);
2719 rc = record_attach(env, llh, mti->mti_svname,
2720 "obdfilter"/*LUSTRE_OST_NAME*/, mti->mti_uuid);
2723 rc = record_setup(env, llh, mti->mti_svname,
2724 "dev"/*ignored*/, "type"/*ignored*/,
2725 failout ? "n" : "f", NULL/*options*/);
2728 rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname, "add ost");
2732 record_end_log(env, &llh);
2735 /* We also have to update the other logs where this osc is part of
2738 if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags)) {
2739 /* If we're upgrading, the old mdt log already has our
2740 entry. Let's do a fake one for fun. */
2741 /* Note that we can't add any new failnids, since we don't
2742 know the old osc names. */
2743 flags = CM_SKIP | CM_UPGRADE146;
2745 } else if ((mti->mti_flags & LDD_F_UPDATE) != LDD_F_UPDATE) {
2746 /* If the update flag isn't set, don't update client/mdt
2749 LCONSOLE_WARN("Client log for %s was not updated; writeconf "
2750 "the MDT first to regenerate it.\n",
2754 /* Add ost to all MDT lov defs */
2755 for (i = 0; i < INDEX_MAP_SIZE * 8; i++){
2756 if (test_bit(i, fsdb->fsdb_mdt_index_map)) {
2759 rc = name_create_mdt_and_lov(&logname, &lovname, fsdb,
2763 sprintf(mdt_index, "-MDT%04x", i);
2764 rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, mti,
2766 lovname, LUSTRE_SP_MDT,
2768 name_destroy(&logname);
2769 name_destroy(&lovname);
2775 /* Append ost info to the client log */
2776 rc = name_create(&logname, mti->mti_fsname, "-client");
2779 if (mgs_log_is_empty(env, mgs, logname)) {
2780 /* Start client log */
2781 rc = mgs_write_log_lov(env, mgs, fsdb, mti, logname,
2785 rc = mgs_write_log_lmv(env, mgs, fsdb, mti, logname,
2790 rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, mti, logname, "",
2791 fsdb->fsdb_clilov, LUSTRE_SP_CLI, flags);
2793 name_destroy(&logname);
2797 static __inline__ int mgs_param_empty(char *ptr)
2801 if ((tmp = strchr(ptr, '=')) && (*(++tmp) == '\0'))
2806 static int mgs_write_log_failnid_internal(const struct lu_env *env,
2807 struct mgs_device *mgs,
2809 struct mgs_target_info *mti,
2810 char *logname, char *cliname)
2813 struct llog_handle *llh = NULL;
2815 if (mgs_param_empty(mti->mti_params)) {
2816 /* Remove _all_ failnids */
2817 rc = mgs_modify(env, mgs, fsdb, mti, logname,
2818 mti->mti_svname, "add failnid", CM_SKIP);
2819 return rc < 0 ? rc : 0;
2822 /* Otherwise failover nids are additive */
2823 rc = record_start_log(env, mgs, &llh, logname);
2826 /* FIXME this should be a single journal transaction */
2827 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
2831 rc = mgs_write_log_failnids(env, mti, llh, cliname);
2834 rc = record_marker(env, llh, fsdb, CM_END,
2835 mti->mti_svname, "add failnid");
2837 record_end_log(env, &llh);
2842 /* Add additional failnids to an existing log.
2843 The mdc/osc must have been added to logs first */
2844 /* tcp nids must be in dotted-quad ascii -
2845 we can't resolve hostnames from the kernel. */
2846 static int mgs_write_log_add_failnid(const struct lu_env *env,
2847 struct mgs_device *mgs,
2849 struct mgs_target_info *mti)
2851 char *logname, *cliname;
2855 /* FIXME we currently can't erase the failnids
2856 * given when a target first registers, since they aren't part of
2857 * an "add uuid" stanza */
2859 /* Verify that we know about this target */
2860 if (mgs_log_is_empty(env, mgs, mti->mti_svname)) {
2861 LCONSOLE_ERROR_MSG(0x142, "The target %s has not registered "
2862 "yet. It must be started before failnids "
2863 "can be added.\n", mti->mti_svname);
2867 /* Create mdc/osc client name (e.g. lustre-OST0001-osc) */
2868 if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
2869 rc = name_create(&cliname, mti->mti_svname, "-mdc");
2870 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
2871 rc = name_create(&cliname, mti->mti_svname, "-osc");
2877 /* Add failover nids to the client log */
2878 rc = name_create(&logname, mti->mti_fsname, "-client");
2880 name_destroy(&cliname);
2883 rc = mgs_write_log_failnid_internal(env, mgs, fsdb,mti,logname,cliname);
2884 name_destroy(&logname);
2885 name_destroy(&cliname);
2889 if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
2890 /* Add OST failover nids to the MDT logs as well */
2893 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
2894 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
2896 rc = name_create_mdt(&logname, mti->mti_fsname, i);
2899 rc = name_create_mdt_osc(&cliname, mti->mti_svname,
2902 name_destroy(&logname);
2905 rc = mgs_write_log_failnid_internal(env, mgs, fsdb,
2908 name_destroy(&cliname);
2909 name_destroy(&logname);
2918 static int mgs_wlp_lcfg(const struct lu_env *env,
2919 struct mgs_device *mgs, struct fs_db *fsdb,
2920 struct mgs_target_info *mti,
2921 char *logname, struct lustre_cfg_bufs *bufs,
2922 char *tgtname, char *ptr)
2924 char comment[MTI_NAME_MAXLEN];
2926 struct llog_cfg_rec *lcr;
2929 /* Erase any old settings of this same parameter */
2930 memcpy(comment, ptr, MTI_NAME_MAXLEN);
2931 comment[MTI_NAME_MAXLEN - 1] = 0;
2932 /* But don't try to match the value. */
2933 tmp = strchr(comment, '=');
2936 /* FIXME we should skip settings that are the same as old values */
2937 rc = mgs_modify(env, mgs, fsdb, mti, logname, tgtname, comment,CM_SKIP);
2940 del = mgs_param_empty(ptr);
2942 LCONSOLE_INFO("%s parameter %s.%s in log %s\n", del ? "Disabling" : rc ?
2943 "Setting" : "Modifying", tgtname, comment, logname);
2945 /* mgs_modify() will return 1 if nothing had to be done */
2951 lustre_cfg_bufs_reset(bufs, tgtname);
2952 lustre_cfg_bufs_set_string(bufs, 1, ptr);
2953 if (mti->mti_flags & LDD_F_PARAM2)
2954 lustre_cfg_bufs_set_string(bufs, 2, LCTL_UPCALL);
2956 lcr = lustre_cfg_rec_new((mti->mti_flags & LDD_F_PARAM2) ?
2957 LCFG_SET_PARAM : LCFG_PARAM, bufs);
2961 rc = mgs_write_log_direct(env, mgs, fsdb, logname, lcr, tgtname,
2963 lustre_cfg_rec_free(lcr);
2967 static int mgs_write_log_param2(const struct lu_env *env,
2968 struct mgs_device *mgs,
2970 struct mgs_target_info *mti, char *ptr)
2972 struct lustre_cfg_bufs bufs;
2976 CDEBUG(D_MGS, "next param '%s'\n", ptr);
2977 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, PARAMS_FILENAME, &bufs,
2978 mti->mti_svname, ptr);
2983 /* write global variable settings into log */
2984 static int mgs_write_log_sys(const struct lu_env *env,
2985 struct mgs_device *mgs, struct fs_db *fsdb,
2986 struct mgs_target_info *mti, char *sys, char *ptr)
2988 struct mgs_thread_info *mgi = mgs_env_info(env);
2989 struct lustre_cfg *lcfg;
2990 struct llog_cfg_rec *lcr;
2992 int rc, cmd, convert = 1;
2994 if (class_match_param(ptr, PARAM_TIMEOUT, &tmp) == 0) {
2995 cmd = LCFG_SET_TIMEOUT;
2996 } else if (class_match_param(ptr, PARAM_LDLM_TIMEOUT, &tmp) == 0) {
2997 cmd = LCFG_SET_LDLM_TIMEOUT;
2998 /* Check for known params here so we can return error to lctl */
2999 } else if ((class_match_param(ptr, PARAM_AT_MIN, &tmp) == 0) ||
3000 (class_match_param(ptr, PARAM_AT_MAX, &tmp) == 0) ||
3001 (class_match_param(ptr, PARAM_AT_EXTRA, &tmp) == 0) ||
3002 (class_match_param(ptr, PARAM_AT_EARLY_MARGIN, &tmp) == 0) ||
3003 (class_match_param(ptr, PARAM_AT_HISTORY, &tmp) == 0)) {
3005 } else if (class_match_param(ptr, PARAM_JOBID_VAR, &tmp) == 0) {
3006 convert = 0; /* Don't convert string value to integer */
3012 if (mgs_param_empty(ptr))
3013 CDEBUG(D_MGS, "global '%s' removed\n", sys);
3015 CDEBUG(D_MGS, "global '%s' val=%s\n", sys, tmp);
3017 lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL);
3018 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, sys);
3019 if (!convert && *tmp != '\0')
3020 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 2, tmp);
3021 lcr = lustre_cfg_rec_new(cmd, &mgi->mgi_bufs);
3025 lcfg = &lcr->lcr_cfg;
3026 lcfg->lcfg_num = convert ? simple_strtoul(tmp, NULL, 0) : 0;
3027 /* truncate the comment to the parameter name */
3031 /* modify all servers and clients */
3032 rc = mgs_write_log_direct_all(env, mgs, fsdb, mti,
3033 *tmp == '\0' ? NULL : lcr,
3034 mti->mti_fsname, sys, 0);
3035 if (rc == 0 && *tmp != '\0') {
3037 case LCFG_SET_TIMEOUT:
3038 if (!obd_timeout_set || lcfg->lcfg_num > obd_timeout)
3039 class_process_config(lcfg);
3041 case LCFG_SET_LDLM_TIMEOUT:
3042 if (!ldlm_timeout_set || lcfg->lcfg_num > ldlm_timeout)
3043 class_process_config(lcfg);
3050 lustre_cfg_rec_free(lcr);
3054 /* write quota settings into log */
3055 static int mgs_write_log_quota(const struct lu_env *env, struct mgs_device *mgs,
3056 struct fs_db *fsdb, struct mgs_target_info *mti,
3057 char *quota, char *ptr)
3059 struct mgs_thread_info *mgi = mgs_env_info(env);
3060 struct llog_cfg_rec *lcr;
3063 int rc, cmd = LCFG_PARAM;
3065 /* support only 'meta' and 'data' pools so far */
3066 if (class_match_param(ptr, QUOTA_METAPOOL_NAME, &tmp) != 0 &&
3067 class_match_param(ptr, QUOTA_DATAPOOL_NAME, &tmp) != 0) {
3068 CERROR("parameter quota.%s isn't supported (only quota.mdt "
3069 "& quota.ost are)\n", ptr);
3074 CDEBUG(D_MGS, "global '%s' removed\n", quota);
3076 CDEBUG(D_MGS, "global '%s'\n", quota);
3078 if (strchr(tmp, 'u') == NULL && strchr(tmp, 'g') == NULL &&
3079 strcmp(tmp, "none") != 0) {
3080 CERROR("enable option(%s) isn't supported\n", tmp);
3085 lustre_cfg_bufs_reset(&mgi->mgi_bufs, mti->mti_fsname);
3086 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, quota);
3087 lcr = lustre_cfg_rec_new(cmd, &mgi->mgi_bufs);
3091 /* truncate the comment to the parameter name */
3096 /* XXX we duplicated quota enable information in all server
3097 * config logs, it should be moved to a separate config
3098 * log once we cleanup the config log for global param. */
3099 /* modify all servers */
3100 rc = mgs_write_log_direct_all(env, mgs, fsdb, mti,
3101 *tmp == '\0' ? NULL : lcr,
3102 mti->mti_fsname, quota, 1);
3104 lustre_cfg_rec_free(lcr);
3105 return rc < 0 ? rc : 0;
3108 static int mgs_srpc_set_param_disk(const struct lu_env *env,
3109 struct mgs_device *mgs,
3111 struct mgs_target_info *mti,
3114 struct mgs_thread_info *mgi = mgs_env_info(env);
3115 struct llog_cfg_rec *lcr;
3116 struct llog_handle *llh = NULL;
3118 char *comment, *ptr;
3124 ptr = strchr(param, '=');
3125 LASSERT(ptr != NULL);
3128 OBD_ALLOC(comment, len + 1);
3129 if (comment == NULL)
3131 strncpy(comment, param, len);
3132 comment[len] = '\0';
3135 lustre_cfg_bufs_reset(&mgi->mgi_bufs, mti->mti_svname);
3136 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, param);
3137 lcr = lustre_cfg_rec_new(LCFG_SPTLRPC_CONF, &mgi->mgi_bufs);
3139 GOTO(out_comment, rc = -ENOMEM);
3141 /* construct log name */
3142 rc = name_create(&logname, mti->mti_fsname, "-sptlrpc");
3146 if (mgs_log_is_empty(env, mgs, logname)) {
3147 rc = record_start_log(env, mgs, &llh, logname);
3150 record_end_log(env, &llh);
3153 /* obsolete old one */
3154 rc = mgs_modify(env, mgs, fsdb, mti, logname, mti->mti_svname,
3158 /* write the new one */
3159 rc = mgs_write_log_direct(env, mgs, fsdb, logname, lcr,
3160 mti->mti_svname, comment);
3162 CERROR("%s: error writing log %s: rc = %d\n",
3163 mgs->mgs_obd->obd_name, logname, rc);
3165 name_destroy(&logname);
3167 lustre_cfg_rec_free(lcr);
3169 OBD_FREE(comment, len + 1);
3173 static int mgs_srpc_set_param_udesc_mem(struct fs_db *fsdb,
3178 /* disable the adjustable udesc parameter for now, i.e. use default
3179 * setting that client always ship udesc to MDT if possible. to enable
3180 * it simply remove the following line */
3183 ptr = strchr(param, '=');
3188 if (strcmp(param, PARAM_SRPC_UDESC))
3191 if (strcmp(ptr, "yes") == 0) {
3192 set_bit(FSDB_UDESC, &fsdb->fsdb_flags);
3193 CWARN("Enable user descriptor shipping from client to MDT\n");
3194 } else if (strcmp(ptr, "no") == 0) {
3195 clear_bit(FSDB_UDESC, &fsdb->fsdb_flags);
3196 CWARN("Disable user descriptor shipping from client to MDT\n");
3204 CERROR("Invalid param: %s\n", param);
3208 static int mgs_srpc_set_param_mem(struct fs_db *fsdb,
3212 struct sptlrpc_rule rule;
3213 struct sptlrpc_rule_set *rset;
3217 if (strncmp(param, PARAM_SRPC, sizeof(PARAM_SRPC) - 1) != 0) {
3218 CERROR("Invalid sptlrpc parameter: %s\n", param);
3222 if (strncmp(param, PARAM_SRPC_UDESC,
3223 sizeof(PARAM_SRPC_UDESC) - 1) == 0) {
3224 RETURN(mgs_srpc_set_param_udesc_mem(fsdb, param));
3227 if (strncmp(param, PARAM_SRPC_FLVR, sizeof(PARAM_SRPC_FLVR) - 1) != 0) {
3228 CERROR("Invalid sptlrpc flavor parameter: %s\n", param);
3232 param += sizeof(PARAM_SRPC_FLVR) - 1;
3234 rc = sptlrpc_parse_rule(param, &rule);
3238 /* mgs rules implies must be mgc->mgs */
3239 if (test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
3240 if ((rule.sr_from != LUSTRE_SP_MGC &&
3241 rule.sr_from != LUSTRE_SP_ANY) ||
3242 (rule.sr_to != LUSTRE_SP_MGS &&
3243 rule.sr_to != LUSTRE_SP_ANY))
3247 /* preapre room for this coming rule. svcname format should be:
3248 * - fsname: general rule
3249 * - fsname-tgtname: target-specific rule
3251 if (strchr(svname, '-')) {
3252 struct mgs_tgt_srpc_conf *tgtconf;
3255 for (tgtconf = fsdb->fsdb_srpc_tgt; tgtconf != NULL;
3256 tgtconf = tgtconf->mtsc_next) {
3257 if (!strcmp(tgtconf->mtsc_tgt, svname)) {
3266 OBD_ALLOC_PTR(tgtconf);
3267 if (tgtconf == NULL)
3270 name_len = strlen(svname);
3272 OBD_ALLOC(tgtconf->mtsc_tgt, name_len + 1);
3273 if (tgtconf->mtsc_tgt == NULL) {
3274 OBD_FREE_PTR(tgtconf);
3277 memcpy(tgtconf->mtsc_tgt, svname, name_len);
3279 tgtconf->mtsc_next = fsdb->fsdb_srpc_tgt;
3280 fsdb->fsdb_srpc_tgt = tgtconf;
3283 rset = &tgtconf->mtsc_rset;
3284 } else if (strcmp(svname, MGSSELF_NAME) == 0) {
3285 /* put _mgs related srpc rule directly in mgs ruleset */
3286 rset = &fsdb->fsdb_mgs->mgs_lut.lut_sptlrpc_rset;
3288 rset = &fsdb->fsdb_srpc_gen;
3291 rc = sptlrpc_rule_set_merge(rset, &rule);
3296 static int mgs_srpc_set_param(const struct lu_env *env,
3297 struct mgs_device *mgs,
3299 struct mgs_target_info *mti,
3309 /* keep a copy of original param, which could be destroied
3311 copy_size = strlen(param) + 1;
3312 OBD_ALLOC(copy, copy_size);
3315 memcpy(copy, param, copy_size);
3317 rc = mgs_srpc_set_param_mem(fsdb, mti->mti_svname, param);
3321 /* previous steps guaranteed the syntax is correct */
3322 rc = mgs_srpc_set_param_disk(env, mgs, fsdb, mti, copy);
3326 if (test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
3328 * for mgs rules, make them effective immediately.
3330 LASSERT(fsdb->fsdb_srpc_tgt == NULL);
3331 sptlrpc_target_update_exp_flavor(mgs->mgs_obd,
3332 &fsdb->fsdb_srpc_gen);
3336 OBD_FREE(copy, copy_size);
3340 struct mgs_srpc_read_data {
3341 struct fs_db *msrd_fsdb;
3345 static int mgs_srpc_read_handler(const struct lu_env *env,
3346 struct llog_handle *llh,
3347 struct llog_rec_hdr *rec, void *data)
3349 struct mgs_srpc_read_data *msrd = data;
3350 struct cfg_marker *marker;
3351 struct lustre_cfg *lcfg = REC_DATA(rec);
3352 char *svname, *param;
3356 if (rec->lrh_type != OBD_CFG_REC) {
3357 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
3361 cfg_len = REC_DATA_LEN(rec);
3363 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
3365 CERROR("Insane cfg\n");
3369 if (lcfg->lcfg_command == LCFG_MARKER) {
3370 marker = lustre_cfg_buf(lcfg, 1);
3372 if (marker->cm_flags & CM_START &&
3373 marker->cm_flags & CM_SKIP)
3374 msrd->msrd_skip = 1;
3375 if (marker->cm_flags & CM_END)
3376 msrd->msrd_skip = 0;
3381 if (msrd->msrd_skip)
3384 if (lcfg->lcfg_command != LCFG_SPTLRPC_CONF) {
3385 CERROR("invalid command (%x)\n", lcfg->lcfg_command);
3389 svname = lustre_cfg_string(lcfg, 0);
3390 if (svname == NULL) {
3391 CERROR("svname is empty\n");
3395 param = lustre_cfg_string(lcfg, 1);
3396 if (param == NULL) {
3397 CERROR("param is empty\n");
3401 rc = mgs_srpc_set_param_mem(msrd->msrd_fsdb, svname, param);
3403 CERROR("read sptlrpc record error (%d): %s\n", rc, param);
3408 int mgs_get_fsdb_srpc_from_llog(const struct lu_env *env,
3409 struct mgs_device *mgs,
3412 struct llog_handle *llh = NULL;
3413 struct llog_ctxt *ctxt;
3415 struct mgs_srpc_read_data msrd;
3419 /* construct log name */
3420 rc = name_create(&logname, fsdb->fsdb_name, "-sptlrpc");
3424 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
3425 LASSERT(ctxt != NULL);
3427 if (mgs_log_is_empty(env, mgs, logname))
3430 rc = llog_open(env, ctxt, &llh, NULL, logname,
3438 rc = llog_init_handle(env, llh, LLOG_F_IS_PLAIN, NULL);
3440 GOTO(out_close, rc);
3442 if (llog_get_size(llh) <= 1)
3443 GOTO(out_close, rc = 0);
3445 msrd.msrd_fsdb = fsdb;
3448 rc = llog_process(env, llh, mgs_srpc_read_handler, (void *)&msrd,
3452 llog_close(env, llh);
3454 llog_ctxt_put(ctxt);
3455 name_destroy(&logname);
3458 CERROR("failed to read sptlrpc config database: %d\n", rc);
3462 /* Permanent settings of all parameters by writing into the appropriate
3463 * configuration logs.
3464 * A parameter with null value ("<param>='\0'") means to erase it out of
3467 static int mgs_write_log_param(const struct lu_env *env,
3468 struct mgs_device *mgs, struct fs_db *fsdb,
3469 struct mgs_target_info *mti, char *ptr)
3471 struct mgs_thread_info *mgi = mgs_env_info(env);
3477 /* For various parameter settings, we have to figure out which logs
3478 care about them (e.g. both mdt and client for lov settings) */
3479 CDEBUG(D_MGS, "next param '%s'\n", ptr);
3481 /* The params are stored in MOUNT_DATA_FILE and modified via
3482 tunefs.lustre, or set using lctl conf_param */
3484 /* Processed in lustre_start_mgc */
3485 if (class_match_param(ptr, PARAM_MGSNODE, NULL) == 0)
3488 /* Processed in ost/mdt */
3489 if (class_match_param(ptr, PARAM_NETWORK, NULL) == 0)
3492 /* Processed in mgs_write_log_ost */
3493 if (class_match_param(ptr, PARAM_FAILMODE, NULL) == 0) {
3494 if (mti->mti_flags & LDD_F_PARAM) {
3495 LCONSOLE_ERROR_MSG(0x169, "%s can only be "
3496 "changed with tunefs.lustre"
3497 "and --writeconf\n", ptr);
3503 if (class_match_param(ptr, PARAM_SRPC, NULL) == 0) {
3504 rc = mgs_srpc_set_param(env, mgs, fsdb, mti, ptr);
3508 if (class_match_param(ptr, PARAM_FAILNODE, NULL) == 0) {
3509 /* Add a failover nidlist */
3511 /* We already processed failovers params for new
3512 targets in mgs_write_log_target */
3513 if (mti->mti_flags & LDD_F_PARAM) {
3514 CDEBUG(D_MGS, "Adding failnode\n");
3515 rc = mgs_write_log_add_failnid(env, mgs, fsdb, mti);
3520 if (class_match_param(ptr, PARAM_SYS, &tmp) == 0) {
3521 rc = mgs_write_log_sys(env, mgs, fsdb, mti, ptr, tmp);
3525 if (class_match_param(ptr, PARAM_QUOTA, &tmp) == 0) {
3526 rc = mgs_write_log_quota(env, mgs, fsdb, mti, ptr, tmp);
3530 if (class_match_param(ptr, PARAM_OSC PARAM_ACTIVE, &tmp) == 0 ||
3531 class_match_param(ptr, PARAM_MDC PARAM_ACTIVE, &tmp) == 0) {
3532 /* active=0 means off, anything else means on */
3533 int flag = (*tmp == '0') ? CM_EXCLUDE : 0;
3534 bool deactive_osc = memcmp(ptr, PARAM_OSC PARAM_ACTIVE,
3535 strlen(PARAM_OSC PARAM_ACTIVE)) == 0;
3538 if (!deactive_osc) {
3541 rc = server_name2index(mti->mti_svname, &index, NULL);
3546 LCONSOLE_ERROR_MSG(0x144, "%s: MDC0 can not be"
3547 " (de)activated.\n",
3549 GOTO(end, rc = -EINVAL);
3553 LCONSOLE_WARN("Permanently %sactivating %s\n",
3554 flag ? "de" : "re", mti->mti_svname);
3556 rc = name_create(&logname, mti->mti_fsname, "-client");
3559 rc = mgs_modify(env, mgs, fsdb, mti, logname,
3561 deactive_osc ? "add osc" : "add mdc", flag);
3562 name_destroy(&logname);
3567 /* Add to all MDT logs for DNE */
3568 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3569 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
3571 rc = name_create_mdt(&logname, mti->mti_fsname, i);
3574 rc = mgs_modify(env, mgs, fsdb, mti, logname,
3576 deactive_osc ? "add osc" : "add osp",
3578 name_destroy(&logname);
3584 LCONSOLE_ERROR_MSG(0x145, "Couldn't find %s in"
3585 "log (%d). No permanent "
3586 "changes were made to the "
3588 mti->mti_svname, rc);
3589 if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags))
3590 LCONSOLE_ERROR_MSG(0x146, "This may be"
3595 "update the logs.\n");
3598 /* Fall through to osc/mdc proc for deactivating live
3599 OSC/OSP on running MDT / clients. */
3601 /* Below here, let obd's XXX_process_config methods handle it */
3603 /* All lov. in proc */
3604 if (class_match_param(ptr, PARAM_LOV, NULL) == 0) {
3607 CDEBUG(D_MGS, "lov param %s\n", ptr);
3608 if (!(mti->mti_flags & LDD_F_SV_TYPE_MDT)) {
3609 LCONSOLE_ERROR_MSG(0x147, "LOV params must be "
3610 "set on the MDT, not %s. "
3617 if (mgs_log_is_empty(env, mgs, mti->mti_svname))
3618 GOTO(end, rc = -ENODEV);
3620 rc = name_create_mdt_and_lov(&logname, &mdtlovname, fsdb,
3621 mti->mti_stripe_index);
3624 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, mti->mti_svname,
3625 &mgi->mgi_bufs, mdtlovname, ptr);
3626 name_destroy(&logname);
3627 name_destroy(&mdtlovname);
3632 rc = name_create(&logname, mti->mti_fsname, "-client");
3635 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname, &mgi->mgi_bufs,
3636 fsdb->fsdb_clilov, ptr);
3637 name_destroy(&logname);
3641 /* All osc., mdc., llite. params in proc */
3642 if ((class_match_param(ptr, PARAM_OSC, NULL) == 0) ||
3643 (class_match_param(ptr, PARAM_MDC, NULL) == 0) ||
3644 (class_match_param(ptr, PARAM_LLITE, NULL) == 0)) {
3647 if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags)) {
3648 LCONSOLE_ERROR_MSG(0x148, "Upgraded client logs for %s"
3649 " cannot be modified. Consider"
3650 " updating the configuration with"
3653 GOTO(end, rc = -EINVAL);
3655 if (memcmp(ptr, PARAM_LLITE, strlen(PARAM_LLITE)) == 0) {
3656 rc = name_create(&cname, mti->mti_fsname, "-client");
3657 /* Add the client type to match the obdname in
3658 class_config_llog_handler */
3659 } else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
3660 rc = name_create(&cname, mti->mti_svname, "-mdc");
3661 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
3662 rc = name_create(&cname, mti->mti_svname, "-osc");
3664 GOTO(end, rc = -EINVAL);
3669 /* Forbid direct update of llite root squash parameters.
3670 * These parameters are indirectly set via the MDT settings.
3672 if ((class_match_param(ptr, PARAM_LLITE, &tmp) == 0) &&
3673 ((memcmp(tmp, "root_squash=", 12) == 0) ||
3674 (memcmp(tmp, "nosquash_nids=", 14) == 0))) {
3675 LCONSOLE_ERROR("%s: root squash parameters can only "
3676 "be updated through MDT component\n",
3678 name_destroy(&cname);
3679 GOTO(end, rc = -EINVAL);
3682 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
3685 rc = name_create(&logname, mti->mti_fsname, "-client");
3687 name_destroy(&cname);
3690 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname, &mgi->mgi_bufs,
3693 /* osc params affect the MDT as well */
3694 if (!rc && (mti->mti_flags & LDD_F_SV_TYPE_OST)) {
3697 for (i = 0; i < INDEX_MAP_SIZE * 8; i++){
3698 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
3700 name_destroy(&cname);
3701 rc = name_create_mdt_osc(&cname, mti->mti_svname,
3703 name_destroy(&logname);
3706 rc = name_create_mdt(&logname,
3707 mti->mti_fsname, i);
3710 if (!mgs_log_is_empty(env, mgs, logname)) {
3711 rc = mgs_wlp_lcfg(env, mgs, fsdb,
3721 /* For mdc activate/deactivate, it affects OSP on MDT as well */
3722 if (class_match_param(ptr, PARAM_MDC PARAM_ACTIVE, &tmp) == 0 &&
3725 char *lodname = NULL;
3726 char *param_str = NULL;
3730 /* replace mdc with osp */
3731 memcpy(ptr, PARAM_OSP, strlen(PARAM_OSP));
3732 rc = server_name2index(mti->mti_svname, &index, NULL);
3734 memcpy(ptr, PARAM_MDC, strlen(PARAM_MDC));
3738 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3739 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
3745 name_destroy(&logname);
3746 rc = name_create_mdt(&logname, mti->mti_fsname,
3751 if (mgs_log_is_empty(env, mgs, logname))
3754 snprintf(suffix, sizeof(suffix), "-osp-MDT%04x",
3756 name_destroy(&cname);
3757 rc = name_create(&cname, mti->mti_svname,
3762 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname,
3763 &mgi->mgi_bufs, cname, ptr);
3767 /* Add configuration log for noitfying LOD
3768 * to active/deactive the OSP. */
3769 name_destroy(¶m_str);
3770 rc = name_create(¶m_str, cname,
3771 (*tmp == '0') ? ".active=0" :
3776 name_destroy(&lodname);
3777 rc = name_create(&lodname, logname, "-mdtlov");
3781 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname,
3782 &mgi->mgi_bufs, lodname,
3787 memcpy(ptr, PARAM_MDC, strlen(PARAM_MDC));
3788 name_destroy(&lodname);
3789 name_destroy(¶m_str);
3792 name_destroy(&logname);
3793 name_destroy(&cname);
3797 /* All mdt. params in proc */
3798 if (class_match_param(ptr, PARAM_MDT, &tmp) == 0) {
3802 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
3803 if (strncmp(mti->mti_svname, mti->mti_fsname,
3804 MTI_NAME_MAXLEN) == 0)
3805 /* device is unspecified completely? */
3806 rc = LDD_F_SV_TYPE_MDT | LDD_F_SV_ALL;
3808 rc = server_name2index(mti->mti_svname, &idx, NULL);
3811 if ((rc & LDD_F_SV_TYPE_MDT) == 0)
3813 if (rc & LDD_F_SV_ALL) {
3814 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3816 fsdb->fsdb_mdt_index_map))
3818 rc = name_create_mdt(&logname,
3819 mti->mti_fsname, i);
3822 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti,
3823 logname, &mgi->mgi_bufs,
3825 name_destroy(&logname);
3830 if ((memcmp(tmp, "root_squash=", 12) == 0) ||
3831 (memcmp(tmp, "nosquash_nids=", 14) == 0)) {
3832 LCONSOLE_ERROR("%s: root squash parameters "
3833 "cannot be applied to a single MDT\n",
3835 GOTO(end, rc = -EINVAL);
3837 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti,
3838 mti->mti_svname, &mgi->mgi_bufs,
3839 mti->mti_svname, ptr);
3844 /* root squash settings are also applied to llite
3845 * config log (see LU-1778) */
3847 ((memcmp(tmp, "root_squash=", 12) == 0) ||
3848 (memcmp(tmp, "nosquash_nids=", 14) == 0))) {
3852 rc = name_create(&cname, mti->mti_fsname, "-client");
3855 rc = name_create(&logname, mti->mti_fsname, "-client");
3857 name_destroy(&cname);
3860 rc = name_create(&ptr2, PARAM_LLITE, tmp);
3862 name_destroy(&cname);
3863 name_destroy(&logname);
3866 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname,
3867 &mgi->mgi_bufs, cname, ptr2);
3868 name_destroy(&ptr2);
3869 name_destroy(&logname);
3870 name_destroy(&cname);
3875 /* All mdd., ost. and osd. params in proc */
3876 if ((class_match_param(ptr, PARAM_MDD, NULL) == 0) ||
3877 (class_match_param(ptr, PARAM_OST, NULL) == 0) ||
3878 (class_match_param(ptr, PARAM_OSD, NULL) == 0)) {
3879 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
3880 if (mgs_log_is_empty(env, mgs, mti->mti_svname))
3881 GOTO(end, rc = -ENODEV);
3883 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, mti->mti_svname,
3884 &mgi->mgi_bufs, mti->mti_svname, ptr);
3888 LCONSOLE_WARN("Ignoring unrecognized param '%s'\n", ptr);
3892 CERROR("err %d on param '%s'\n", rc, ptr);
3897 int mgs_write_log_target(const struct lu_env *env, struct mgs_device *mgs,
3898 struct mgs_target_info *mti, struct fs_db *fsdb)
3905 /* set/check the new target index */
3906 rc = mgs_set_index(env, mgs, mti);
3910 if (rc == EALREADY) {
3911 LCONSOLE_WARN("Found index %d for %s, updating log\n",
3912 mti->mti_stripe_index, mti->mti_svname);
3913 /* We would like to mark old log sections as invalid
3914 and add new log sections in the client and mdt logs.
3915 But if we add new sections, then live clients will
3916 get repeat setup instructions for already running
3917 osc's. So don't update the client/mdt logs. */
3918 mti->mti_flags &= ~LDD_F_UPDATE;
3922 OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_WRITE_TARGET_DELAY, cfs_fail_val > 0 ?
3925 mutex_lock(&fsdb->fsdb_mutex);
3927 if (mti->mti_flags &
3928 (LDD_F_VIRGIN | LDD_F_UPGRADE14 | LDD_F_WRITECONF)) {
3929 /* Generate a log from scratch */
3930 if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
3931 rc = mgs_write_log_mdt(env, mgs, fsdb, mti);
3932 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
3933 rc = mgs_write_log_ost(env, mgs, fsdb, mti);
3935 CERROR("Unknown target type %#x, can't create log for "
3936 "%s\n", mti->mti_flags, mti->mti_svname);
3939 CERROR("Can't write logs for %s (%d)\n",
3940 mti->mti_svname, rc);
3944 /* Just update the params from tunefs in mgs_write_log_params */
3945 CDEBUG(D_MGS, "Update params for %s\n", mti->mti_svname);
3946 mti->mti_flags |= LDD_F_PARAM;
3949 /* allocate temporary buffer, where class_get_next_param will
3950 make copy of a current parameter */
3951 OBD_ALLOC(buf, strlen(mti->mti_params) + 1);
3953 GOTO(out_up, rc = -ENOMEM);
3954 params = mti->mti_params;
3955 while (params != NULL) {
3956 rc = class_get_next_param(¶ms, buf);
3959 /* there is no next parameter, that is
3964 CDEBUG(D_MGS, "remaining string: '%s', param: '%s'\n",
3966 rc = mgs_write_log_param(env, mgs, fsdb, mti, buf);
3971 OBD_FREE(buf, strlen(mti->mti_params) + 1);
3974 mutex_unlock(&fsdb->fsdb_mutex);
3978 int mgs_erase_log(const struct lu_env *env, struct mgs_device *mgs, char *name)
3980 struct llog_ctxt *ctxt;
3983 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
3985 CERROR("%s: MGS config context doesn't exist\n",
3986 mgs->mgs_obd->obd_name);
3989 rc = llog_erase(env, ctxt, NULL, name);
3990 /* llog may not exist */
3993 llog_ctxt_put(ctxt);
3997 CERROR("%s: failed to clear log %s: %d\n",
3998 mgs->mgs_obd->obd_name, name, rc);
4003 /* erase all logs for the given fs */
4004 int mgs_erase_logs(const struct lu_env *env, struct mgs_device *mgs,
4007 struct list_head log_list;
4008 struct mgs_direntry *dirent, *n;
4009 int rc, len = strlen(fsname);
4013 /* Find all the logs in the CONFIGS directory */
4014 rc = class_dentry_readdir(env, mgs, &log_list);
4018 mutex_lock(&mgs->mgs_mutex);
4019 /* Delete the fs db */
4020 mgs_remove_fsdb_by_name(mgs, fsname);
4021 mutex_unlock(&mgs->mgs_mutex);
4023 list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
4024 list_del_init(&dirent->mde_list);
4025 suffix = strrchr(dirent->mde_name, '-');
4026 if (suffix != NULL) {
4027 if ((len == suffix - dirent->mde_name) &&
4028 (strncmp(fsname, dirent->mde_name, len) == 0)) {
4029 CDEBUG(D_MGS, "Removing log %s\n",
4031 mgs_erase_log(env, mgs, dirent->mde_name);
4034 mgs_direntry_free(dirent);
4040 /* list all logs for the given fs */
4041 int mgs_list_logs(const struct lu_env *env, struct mgs_device *mgs,
4042 struct obd_ioctl_data *data)
4044 struct list_head log_list;
4045 struct mgs_direntry *dirent, *n;
4051 /* Find all the logs in the CONFIGS directory */
4052 rc = class_dentry_readdir(env, mgs, &log_list);
4056 out = data->ioc_bulk;
4057 remains = data->ioc_inllen1;
4058 list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
4059 list_del_init(&dirent->mde_list);
4060 suffix = strrchr(dirent->mde_name, '-');
4061 if (suffix != NULL) {
4062 l = snprintf(out, remains, "config log: $%s\n",
4067 mgs_direntry_free(dirent);
4074 /* from llog_swab */
4075 static void print_lustre_cfg(struct lustre_cfg *lcfg)
4080 CDEBUG(D_MGS, "lustre_cfg: %p\n", lcfg);
4081 CDEBUG(D_MGS, "\tlcfg->lcfg_version: %#x\n", lcfg->lcfg_version);
4083 CDEBUG(D_MGS, "\tlcfg->lcfg_command: %#x\n", lcfg->lcfg_command);
4084 CDEBUG(D_MGS, "\tlcfg->lcfg_num: %#x\n", lcfg->lcfg_num);
4085 CDEBUG(D_MGS, "\tlcfg->lcfg_flags: %#x\n", lcfg->lcfg_flags);
4086 CDEBUG(D_MGS, "\tlcfg->lcfg_nid: %s\n", libcfs_nid2str(lcfg->lcfg_nid));
4088 CDEBUG(D_MGS, "\tlcfg->lcfg_bufcount: %d\n", lcfg->lcfg_bufcount);
4089 if (lcfg->lcfg_bufcount < LUSTRE_CFG_MAX_BUFCOUNT)
4090 for (i = 0; i < lcfg->lcfg_bufcount; i++) {
4091 CDEBUG(D_MGS, "\tlcfg->lcfg_buflens[%d]: %d %s\n",
4092 i, lcfg->lcfg_buflens[i],
4093 lustre_cfg_string(lcfg, i));
4098 /* Setup _mgs fsdb and log
4100 int mgs__mgs_fsdb_setup(const struct lu_env *env, struct mgs_device *mgs)
4102 struct fs_db *fsdb = NULL;
4106 rc = mgs_find_or_make_fsdb(env, mgs, MGSSELF_NAME, &fsdb);
4108 mgs_put_fsdb(mgs, fsdb);
4113 /* Setup params fsdb and log
4115 int mgs_params_fsdb_setup(const struct lu_env *env, struct mgs_device *mgs)
4117 struct fs_db *fsdb = NULL;
4118 struct llog_handle *params_llh = NULL;
4122 rc = mgs_find_or_make_fsdb(env, mgs, PARAMS_FILENAME, &fsdb);
4124 mutex_lock(&fsdb->fsdb_mutex);
4125 rc = record_start_log(env, mgs, ¶ms_llh, PARAMS_FILENAME);
4127 rc = record_end_log(env, ¶ms_llh);
4128 mutex_unlock(&fsdb->fsdb_mutex);
4129 mgs_put_fsdb(mgs, fsdb);
4135 /* Cleanup params fsdb and log
4137 int mgs_params_fsdb_cleanup(const struct lu_env *env, struct mgs_device *mgs)
4139 return mgs_erase_logs(env, mgs, PARAMS_FILENAME);
4142 /* Set a permanent (config log) param for a target or fs
4143 * \param lcfg buf0 may contain the device (testfs-MDT0000) name
4144 * buf1 contains the single parameter
4146 int mgs_setparam(const struct lu_env *env, struct mgs_device *mgs,
4147 struct lustre_cfg *lcfg, char *fsname)
4149 struct fs_db *fsdb = NULL;
4150 struct mgs_target_info *mti = NULL;
4151 char *devname, *param;
4159 print_lustre_cfg(lcfg);
4161 /* lustre, lustre-mdtlov, lustre-client, lustre-MDT0000 */
4162 devname = lustre_cfg_string(lcfg, 0);
4163 param = lustre_cfg_string(lcfg, 1);
4165 /* Assume device name embedded in param:
4166 lustre-OST0000.osc.max_dirty_mb=32 */
4167 ptr = strchr(param, '.');
4175 LCONSOLE_ERROR_MSG(0x14d, "No target specified: %s\n", param);
4179 rc = mgs_parse_devname(devname, fsname, NULL);
4180 if (rc == 0 && !mgs_parse_devname(devname, NULL, &index)) {
4181 /* param related to llite isn't allowed to set by OST or MDT */
4182 if (rc == 0 && strncmp(param, PARAM_LLITE,
4183 sizeof(PARAM_LLITE) - 1) == 0)
4186 /* assume devname is the fsname */
4187 strlcpy(fsname, devname, MTI_NAME_MAXLEN);
4189 CDEBUG(D_MGS, "setparam fs='%s' device='%s'\n", fsname, devname);
4191 rc = mgs_find_or_make_fsdb(env, mgs,
4192 lcfg->lcfg_command == LCFG_SET_PARAM ?
4193 PARAMS_FILENAME : fsname, &fsdb);
4197 if (lcfg->lcfg_command != LCFG_SET_PARAM &&
4198 !test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags) &&
4199 test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) {
4200 CERROR("No filesystem targets for %s. cfg_device from lctl "
4201 "is '%s'\n", fsname, devname);
4203 GOTO(out, rc = -EINVAL);
4206 /* Create a fake mti to hold everything */
4209 GOTO(out, rc = -ENOMEM);
4210 if (strlcpy(mti->mti_fsname, fsname, sizeof(mti->mti_fsname))
4211 >= sizeof(mti->mti_fsname))
4212 GOTO(out, rc = -E2BIG);
4213 if (strlcpy(mti->mti_svname, devname, sizeof(mti->mti_svname))
4214 >= sizeof(mti->mti_svname))
4215 GOTO(out, rc = -E2BIG);
4216 if (strlcpy(mti->mti_params, param, sizeof(mti->mti_params))
4217 >= sizeof(mti->mti_params))
4218 GOTO(out, rc = -E2BIG);
4219 rc = server_name2index(mti->mti_svname, &mti->mti_stripe_index, &tmp);
4221 /* Not a valid server; may be only fsname */
4224 /* Strip -osc or -mdc suffix from svname */
4225 if (server_make_name(rc, mti->mti_stripe_index, mti->mti_fsname,
4227 GOTO(out, rc = -EINVAL);
4229 * Revoke lock so everyone updates. Should be alright if
4230 * someone was already reading while we were updating the logs,
4231 * so we don't really need to hold the lock while we're
4234 if (lcfg->lcfg_command == LCFG_SET_PARAM) {
4235 mti->mti_flags = rc | LDD_F_PARAM2;
4236 mutex_lock(&fsdb->fsdb_mutex);
4237 rc = mgs_write_log_param2(env, mgs, fsdb, mti, mti->mti_params);
4238 mutex_unlock(&fsdb->fsdb_mutex);
4239 mgs_revoke_lock(mgs, fsdb, CONFIG_T_PARAMS);
4241 mti->mti_flags = rc | LDD_F_PARAM;
4242 mutex_lock(&fsdb->fsdb_mutex);
4243 rc = mgs_write_log_param(env, mgs, fsdb, mti, mti->mti_params);
4244 mutex_unlock(&fsdb->fsdb_mutex);
4245 mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG);
4254 mgs_unlink_fsdb(mgs, fsdb);
4255 mgs_put_fsdb(mgs, fsdb);
4261 static int mgs_write_log_pool(const struct lu_env *env,
4262 struct mgs_device *mgs, char *logname,
4263 struct fs_db *fsdb, char *tgtname,
4264 enum lcfg_command_type cmd,
4265 char *fsname, char *poolname,
4266 char *ostname, char *comment)
4268 struct llog_handle *llh = NULL;
4271 rc = record_start_log(env, mgs, &llh, logname);
4274 rc = record_marker(env, llh, fsdb, CM_START, tgtname, comment);
4277 rc = record_base(env, llh, tgtname, 0, cmd,
4278 fsname, poolname, ostname, NULL);
4281 rc = record_marker(env, llh, fsdb, CM_END, tgtname, comment);
4283 record_end_log(env, &llh);
4287 int mgs_nodemap_cmd(const struct lu_env *env, struct mgs_device *mgs,
4288 enum lcfg_command_type cmd, const char *nodemap_name,
4299 case LCFG_NODEMAP_ADD:
4300 rc = nodemap_add(nodemap_name);
4302 case LCFG_NODEMAP_DEL:
4303 rc = nodemap_del(nodemap_name);
4305 case LCFG_NODEMAP_ADD_RANGE:
4306 rc = nodemap_parse_range(param, nid);
4309 rc = nodemap_add_range(nodemap_name, nid);
4311 case LCFG_NODEMAP_DEL_RANGE:
4312 rc = nodemap_parse_range(param, nid);
4315 rc = nodemap_del_range(nodemap_name, nid);
4317 case LCFG_NODEMAP_ADMIN:
4318 bool_switch = simple_strtoul(param, NULL, 10);
4319 rc = nodemap_set_allow_root(nodemap_name, bool_switch);
4321 case LCFG_NODEMAP_DENY_UNKNOWN:
4322 bool_switch = simple_strtoul(param, NULL, 10);
4323 rc = nodemap_set_deny_unknown(nodemap_name, bool_switch);
4325 case LCFG_NODEMAP_TRUSTED:
4326 bool_switch = simple_strtoul(param, NULL, 10);
4327 rc = nodemap_set_trust_client_ids(nodemap_name, bool_switch);
4329 case LCFG_NODEMAP_SQUASH_UID:
4330 int_id = simple_strtoul(param, NULL, 10);
4331 rc = nodemap_set_squash_uid(nodemap_name, int_id);
4333 case LCFG_NODEMAP_SQUASH_GID:
4334 int_id = simple_strtoul(param, NULL, 10);
4335 rc = nodemap_set_squash_gid(nodemap_name, int_id);
4337 case LCFG_NODEMAP_ADD_UIDMAP:
4338 case LCFG_NODEMAP_ADD_GIDMAP:
4339 rc = nodemap_parse_idmap(param, idmap);
4342 if (cmd == LCFG_NODEMAP_ADD_UIDMAP)
4343 rc = nodemap_add_idmap(nodemap_name, NODEMAP_UID,
4346 rc = nodemap_add_idmap(nodemap_name, NODEMAP_GID,
4349 case LCFG_NODEMAP_DEL_UIDMAP:
4350 case LCFG_NODEMAP_DEL_GIDMAP:
4351 rc = nodemap_parse_idmap(param, idmap);
4354 if (cmd == LCFG_NODEMAP_DEL_UIDMAP)
4355 rc = nodemap_del_idmap(nodemap_name, NODEMAP_UID,
4358 rc = nodemap_del_idmap(nodemap_name, NODEMAP_GID,
4361 case LCFG_NODEMAP_SET_FILESET:
4362 rc = nodemap_set_fileset(nodemap_name, param);
4371 int mgs_pool_cmd(const struct lu_env *env, struct mgs_device *mgs,
4372 enum lcfg_command_type cmd, char *fsname,
4373 char *poolname, char *ostname)
4378 char *label = NULL, *canceled_label = NULL;
4380 struct mgs_target_info *mti = NULL;
4381 bool checked = false;
4382 bool locked = false;
4387 rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
4389 CERROR("Can't get db for %s\n", fsname);
4392 if (test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) {
4393 CERROR("%s is not defined\n", fsname);
4395 GOTO(out_fsdb, rc = -EINVAL);
4398 label_sz = 10 + strlen(fsname) + strlen(poolname);
4400 /* check if ostname match fsname */
4401 if (ostname != NULL) {
4404 ptr = strrchr(ostname, '-');
4405 if ((ptr == NULL) ||
4406 (strncmp(fsname, ostname, ptr-ostname) != 0))
4408 label_sz += strlen(ostname);
4411 OBD_ALLOC(label, label_sz);
4413 GOTO(out_fsdb, rc = -ENOMEM);
4418 "new %s.%s", fsname, poolname);
4422 "add %s.%s.%s", fsname, poolname, ostname);
4425 OBD_ALLOC(canceled_label, label_sz);
4426 if (canceled_label == NULL)
4427 GOTO(out_label, rc = -ENOMEM);
4429 "rem %s.%s.%s", fsname, poolname, ostname);
4430 sprintf(canceled_label,
4431 "add %s.%s.%s", fsname, poolname, ostname);
4434 OBD_ALLOC(canceled_label, label_sz);
4435 if (canceled_label == NULL)
4436 GOTO(out_label, rc = -ENOMEM);
4438 "del %s.%s", fsname, poolname);
4439 sprintf(canceled_label,
4440 "new %s.%s", fsname, poolname);
4448 GOTO(out_cancel, rc = -ENOMEM);
4449 strncpy(mti->mti_svname, "lov pool", sizeof(mti->mti_svname));
4451 mutex_lock(&fsdb->fsdb_mutex);
4453 /* write pool def to all MDT logs */
4454 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
4455 if (test_bit(i, fsdb->fsdb_mdt_index_map)) {
4456 rc = name_create_mdt_and_lov(&logname, &lovname,
4461 if (!checked && (canceled_label == NULL)) {
4462 rc = mgs_check_marker(env, mgs, fsdb, mti,
4463 logname, lovname, label);
4465 name_destroy(&logname);
4466 name_destroy(&lovname);
4468 rc = (rc == LLOG_PROC_BREAK ?
4473 if (canceled_label != NULL)
4474 rc = mgs_modify(env, mgs, fsdb, mti, logname,
4475 lovname, canceled_label,
4479 rc = mgs_write_log_pool(env, mgs, logname,
4483 name_destroy(&logname);
4484 name_destroy(&lovname);
4490 rc = name_create(&logname, fsname, "-client");
4494 if (!checked && (canceled_label == NULL)) {
4495 rc = mgs_check_marker(env, mgs, fsdb, mti, logname,
4496 fsdb->fsdb_clilov, label);
4498 name_destroy(&logname);
4499 GOTO(out_mti, rc = (rc == LLOG_PROC_BREAK ?
4503 if (canceled_label != NULL) {
4504 rc = mgs_modify(env, mgs, fsdb, mti, logname,
4505 fsdb->fsdb_clilov, canceled_label, CM_SKIP);
4507 name_destroy(&logname);
4512 rc = mgs_write_log_pool(env, mgs, logname, fsdb, fsdb->fsdb_clilov,
4513 cmd, fsname, poolname, ostname, label);
4514 mutex_unlock(&fsdb->fsdb_mutex);
4516 name_destroy(&logname);
4517 /* request for update */
4518 mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG);
4524 mutex_unlock(&fsdb->fsdb_mutex);
4528 if (canceled_label != NULL)
4529 OBD_FREE(canceled_label, label_sz);
4531 OBD_FREE(label, label_sz);
4534 mgs_unlink_fsdb(mgs, fsdb);
4535 mgs_put_fsdb(mgs, fsdb);