4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/mgs/mgs_llog.c
38 * Lustre Management Server (mgs) config llog creation
40 * Author: Nathan Rutman <nathan@clusterfs.com>
41 * Author: Alex Zhuravlev <bzzz@whamcloud.com>
42 * Author: Mikhail Pershin <tappro@whamcloud.com>
45 #define DEBUG_SUBSYSTEM S_MGS
46 #define D_MGS D_CONFIG
50 #include <lustre_param.h>
51 #include <lustre_sec.h>
52 #include <lustre_quota.h>
54 #include "mgs_internal.h"
56 /********************** Class functions ********************/
58 int class_dentry_readdir(const struct lu_env *env,
59 struct mgs_device *mgs, cfs_list_t *list)
61 struct dt_object *dir = mgs->mgs_configs_dir;
62 const struct dt_it_ops *iops;
64 struct mgs_direntry *de;
68 CFS_INIT_LIST_HEAD(list);
70 if (!dt_try_as_dir(env, dir))
71 GOTO(out, rc = -ENOTDIR);
74 LASSERT(dir->do_index_ops);
76 iops = &dir->do_index_ops->dio_it;
77 it = iops->init(env, dir, LUDA_64BITHASH, BYPASS_CAPA);
81 rc = iops->load(env, it, 0);
87 key = (void *)iops->key(env, it);
89 CERROR("%s: key failed when listing %s: rc = %d\n",
90 mgs->mgs_obd->obd_name, MOUNT_CONFIGS_DIR,
94 key_sz = iops->key_size(env, it);
97 /* filter out "." and ".." entries */
101 if (key_sz == 2 && key[1] == '.')
105 de = mgs_direntry_alloc(key_sz + 1);
111 memcpy(de->name, key, key_sz);
112 de->name[key_sz] = 0;
114 cfs_list_add(&de->list, list);
117 rc = iops->next(env, it);
127 CERROR("%s: key failed when listing %s: rc = %d\n",
128 mgs->mgs_obd->obd_name, MOUNT_CONFIGS_DIR, rc);
132 /******************** DB functions *********************/
134 static inline int name_create(char **newname, char *prefix, char *suffix)
137 OBD_ALLOC(*newname, strlen(prefix) + strlen(suffix) + 1);
140 sprintf(*newname, "%s%s", prefix, suffix);
144 static inline void name_destroy(char **name)
147 OBD_FREE(*name, strlen(*name) + 1);
151 struct mgs_fsdb_handler_data
157 /* from the (client) config log, figure out:
158 1. which ost's/mdt's are configured (by index)
159 2. what the last config step is
160 3. COMPAT_18 osc name
162 /* It might be better to have a separate db file, instead of parsing the info
163 out of the client log. This is slow and potentially error-prone. */
164 static int mgs_fsdb_handler(const struct lu_env *env, struct llog_handle *llh,
165 struct llog_rec_hdr *rec, void *data)
167 struct mgs_fsdb_handler_data *d = data;
168 struct fs_db *fsdb = d->fsdb;
169 int cfg_len = rec->lrh_len;
170 char *cfg_buf = (char*) (rec + 1);
171 struct lustre_cfg *lcfg;
176 if (rec->lrh_type != OBD_CFG_REC) {
177 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
181 rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
183 CERROR("Insane cfg\n");
187 lcfg = (struct lustre_cfg *)cfg_buf;
189 CDEBUG(D_INFO, "cmd %x %s %s\n", lcfg->lcfg_command,
190 lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
192 /* Figure out ost indicies */
193 /* lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1 */
194 if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD ||
195 lcfg->lcfg_command == LCFG_LOV_DEL_OBD) {
196 index = simple_strtoul(lustre_cfg_string(lcfg, 2),
198 CDEBUG(D_MGS, "OST index for %s is %u (%s)\n",
199 lustre_cfg_string(lcfg, 1), index,
200 lustre_cfg_string(lcfg, 2));
201 set_bit(index, fsdb->fsdb_ost_index_map);
204 /* Figure out mdt indicies */
205 /* attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:1d834_MNT_client_03f */
206 if ((lcfg->lcfg_command == LCFG_ATTACH) &&
207 (strcmp(lustre_cfg_string(lcfg, 1), LUSTRE_MDC_NAME) == 0)) {
208 rc = server_name2index(lustre_cfg_string(lcfg, 0),
210 if (rc != LDD_F_SV_TYPE_MDT) {
211 CWARN("Unparsable MDC name %s, assuming index 0\n",
212 lustre_cfg_string(lcfg, 0));
216 CDEBUG(D_MGS, "MDT index is %u\n", index);
217 set_bit(index, fsdb->fsdb_mdt_index_map);
218 fsdb->fsdb_mdt_count ++;
222 * figure out the old config. fsdb_gen = 0 means old log
223 * It is obsoleted and not supported anymore
225 if (fsdb->fsdb_gen == 0) {
226 CERROR("Old config format is not supported\n");
231 * compat to 1.8, check osc name used by MDT0 to OSTs, bz18548.
233 if (!test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags) &&
234 lcfg->lcfg_command == LCFG_ATTACH &&
235 strcmp(lustre_cfg_string(lcfg, 1), LUSTRE_OSC_NAME) == 0) {
236 if (OBD_OCD_VERSION_MAJOR(d->ver) == 1 &&
237 OBD_OCD_VERSION_MINOR(d->ver) <= 8) {
238 CWARN("MDT using 1.8 OSC name scheme\n");
239 set_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags);
243 if (lcfg->lcfg_command == LCFG_MARKER) {
244 struct cfg_marker *marker;
245 marker = lustre_cfg_buf(lcfg, 1);
247 d->ver = marker->cm_vers;
249 /* Keep track of the latest marker step */
250 fsdb->fsdb_gen = max(fsdb->fsdb_gen, marker->cm_step);
256 /* fsdb->fsdb_mutex is already held in mgs_find_or_make_fsdb*/
257 static int mgs_get_fsdb_from_llog(const struct lu_env *env,
258 struct mgs_device *mgs,
262 struct llog_handle *loghandle;
263 struct llog_ctxt *ctxt;
264 struct mgs_fsdb_handler_data d = { fsdb, 0 };
269 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
270 LASSERT(ctxt != NULL);
271 rc = name_create(&logname, fsdb->fsdb_name, "-client");
274 rc = llog_open_create(env, ctxt, &loghandle, NULL, logname);
278 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
282 if (llog_get_size(loghandle) <= 1)
283 set_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags);
285 rc = llog_process(env, loghandle, mgs_fsdb_handler, (void *)&d, NULL);
286 CDEBUG(D_INFO, "get_db = %d\n", rc);
288 llog_close(env, loghandle);
290 name_destroy(&logname);
297 static void mgs_free_fsdb_srpc(struct fs_db *fsdb)
299 struct mgs_tgt_srpc_conf *tgtconf;
301 /* free target-specific rules */
302 while (fsdb->fsdb_srpc_tgt) {
303 tgtconf = fsdb->fsdb_srpc_tgt;
304 fsdb->fsdb_srpc_tgt = tgtconf->mtsc_next;
306 LASSERT(tgtconf->mtsc_tgt);
308 sptlrpc_rule_set_free(&tgtconf->mtsc_rset);
309 OBD_FREE(tgtconf->mtsc_tgt, strlen(tgtconf->mtsc_tgt) + 1);
310 OBD_FREE_PTR(tgtconf);
313 /* free general rules */
314 sptlrpc_rule_set_free(&fsdb->fsdb_srpc_gen);
317 struct fs_db *mgs_find_fsdb(struct mgs_device *mgs, char *fsname)
322 cfs_list_for_each(tmp, &mgs->mgs_fs_db_list) {
323 fsdb = cfs_list_entry(tmp, struct fs_db, fsdb_list);
324 if (strcmp(fsdb->fsdb_name, fsname) == 0)
330 /* caller must hold the mgs->mgs_fs_db_lock */
331 static struct fs_db *mgs_new_fsdb(const struct lu_env *env,
332 struct mgs_device *mgs, char *fsname)
338 if (strlen(fsname) >= sizeof(fsdb->fsdb_name)) {
339 CERROR("fsname %s is too long\n", fsname);
347 strcpy(fsdb->fsdb_name, fsname);
348 mutex_init(&fsdb->fsdb_mutex);
349 set_bit(FSDB_UDESC, &fsdb->fsdb_flags);
352 if (strcmp(fsname, MGSSELF_NAME) == 0) {
353 set_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags);
355 OBD_ALLOC(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
356 OBD_ALLOC(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
357 if (!fsdb->fsdb_ost_index_map || !fsdb->fsdb_mdt_index_map) {
358 CERROR("No memory for index maps\n");
359 GOTO(err, rc = -ENOMEM);
362 rc = name_create(&fsdb->fsdb_clilov, fsname, "-clilov");
365 rc = name_create(&fsdb->fsdb_clilmv, fsname, "-clilmv");
369 /* initialise data for NID table */
370 mgs_ir_init_fs(env, mgs, fsdb);
372 lproc_mgs_add_live(mgs, fsdb);
375 cfs_list_add(&fsdb->fsdb_list, &mgs->mgs_fs_db_list);
379 if (fsdb->fsdb_ost_index_map)
380 OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
381 if (fsdb->fsdb_mdt_index_map)
382 OBD_FREE(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
383 name_destroy(&fsdb->fsdb_clilov);
384 name_destroy(&fsdb->fsdb_clilmv);
389 static void mgs_free_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
391 /* wait for anyone with the sem */
392 mutex_lock(&fsdb->fsdb_mutex);
393 lproc_mgs_del_live(mgs, fsdb);
394 cfs_list_del(&fsdb->fsdb_list);
396 /* deinitialize fsr */
397 mgs_ir_fini_fs(mgs, fsdb);
399 if (fsdb->fsdb_ost_index_map)
400 OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
401 if (fsdb->fsdb_mdt_index_map)
402 OBD_FREE(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
403 name_destroy(&fsdb->fsdb_clilov);
404 name_destroy(&fsdb->fsdb_clilmv);
405 mgs_free_fsdb_srpc(fsdb);
406 mutex_unlock(&fsdb->fsdb_mutex);
410 int mgs_init_fsdb_list(struct mgs_device *mgs)
412 CFS_INIT_LIST_HEAD(&mgs->mgs_fs_db_list);
416 int mgs_cleanup_fsdb_list(struct mgs_device *mgs)
419 cfs_list_t *tmp, *tmp2;
420 mutex_lock(&mgs->mgs_mutex);
421 cfs_list_for_each_safe(tmp, tmp2, &mgs->mgs_fs_db_list) {
422 fsdb = cfs_list_entry(tmp, struct fs_db, fsdb_list);
423 mgs_free_fsdb(mgs, fsdb);
425 mutex_unlock(&mgs->mgs_mutex);
429 int mgs_find_or_make_fsdb(const struct lu_env *env,
430 struct mgs_device *mgs, char *name,
437 mutex_lock(&mgs->mgs_mutex);
438 fsdb = mgs_find_fsdb(mgs, name);
440 mutex_unlock(&mgs->mgs_mutex);
445 CDEBUG(D_MGS, "Creating new db\n");
446 fsdb = mgs_new_fsdb(env, mgs, name);
447 /* lock fsdb_mutex until the db is loaded from llogs */
449 mutex_lock(&fsdb->fsdb_mutex);
450 mutex_unlock(&mgs->mgs_mutex);
454 if (!test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
455 /* populate the db from the client llog */
456 rc = mgs_get_fsdb_from_llog(env, mgs, fsdb);
458 CERROR("Can't get db from client log %d\n", rc);
463 /* populate srpc rules from params llog */
464 rc = mgs_get_fsdb_srpc_from_llog(env, mgs, fsdb);
466 CERROR("Can't get db from params log %d\n", rc);
470 mutex_unlock(&fsdb->fsdb_mutex);
476 mutex_unlock(&fsdb->fsdb_mutex);
477 mgs_free_fsdb(mgs, fsdb);
483 -1= empty client log */
484 int mgs_check_index(const struct lu_env *env,
485 struct mgs_device *mgs,
486 struct mgs_target_info *mti)
493 LASSERT(!(mti->mti_flags & LDD_F_NEED_INDEX));
495 rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb);
497 CERROR("Can't get db for %s\n", mti->mti_fsname);
501 if (test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags))
504 if (mti->mti_flags & LDD_F_SV_TYPE_OST)
505 imap = fsdb->fsdb_ost_index_map;
506 else if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
507 imap = fsdb->fsdb_mdt_index_map;
511 if (test_bit(mti->mti_stripe_index, imap))
516 static __inline__ int next_index(void *index_map, int map_len)
519 for (i = 0; i < map_len * 8; i++)
520 if (!test_bit(i, index_map)) {
523 CERROR("max index %d exceeded.\n", i);
528 0 newly marked as in use
530 +EALREADY for update of an old index */
531 static int mgs_set_index(const struct lu_env *env,
532 struct mgs_device *mgs,
533 struct mgs_target_info *mti)
540 rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb);
542 CERROR("Can't get db for %s\n", mti->mti_fsname);
546 mutex_lock(&fsdb->fsdb_mutex);
547 if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
548 imap = fsdb->fsdb_ost_index_map;
549 } else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
550 imap = fsdb->fsdb_mdt_index_map;
552 GOTO(out_up, rc = -EINVAL);
555 if (mti->mti_flags & LDD_F_NEED_INDEX) {
556 rc = next_index(imap, INDEX_MAP_SIZE);
558 GOTO(out_up, rc = -ERANGE);
559 mti->mti_stripe_index = rc;
560 if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
561 fsdb->fsdb_mdt_count ++;
564 if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8) {
565 LCONSOLE_ERROR_MSG(0x13f, "Server %s requested index %d, "
566 "but the max index is %d.\n",
567 mti->mti_svname, mti->mti_stripe_index,
569 GOTO(out_up, rc = -ERANGE);
572 if (test_bit(mti->mti_stripe_index, imap)) {
573 if ((mti->mti_flags & LDD_F_VIRGIN) &&
574 !(mti->mti_flags & LDD_F_WRITECONF)) {
575 LCONSOLE_ERROR_MSG(0x140, "Server %s requested index "
576 "%d, but that index is already in "
577 "use. Use --writeconf to force\n",
579 mti->mti_stripe_index);
580 GOTO(out_up, rc = -EADDRINUSE);
582 CDEBUG(D_MGS, "Server %s updating index %d\n",
583 mti->mti_svname, mti->mti_stripe_index);
584 GOTO(out_up, rc = EALREADY);
588 set_bit(mti->mti_stripe_index, imap);
589 clear_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags);
590 mutex_unlock(&fsdb->fsdb_mutex);
591 server_make_name(mti->mti_flags & ~(LDD_F_VIRGIN | LDD_F_WRITECONF),
592 mti->mti_stripe_index, mti->mti_fsname, mti->mti_svname);
594 CDEBUG(D_MGS, "Set index for %s to %d\n", mti->mti_svname,
595 mti->mti_stripe_index);
599 mutex_unlock(&fsdb->fsdb_mutex);
603 struct mgs_modify_lookup {
604 struct cfg_marker mml_marker;
608 static int mgs_modify_handler(const struct lu_env *env,
609 struct llog_handle *llh,
610 struct llog_rec_hdr *rec, void *data)
612 struct mgs_modify_lookup *mml = data;
613 struct cfg_marker *marker;
614 struct lustre_cfg *lcfg = REC_DATA(rec);
615 int cfg_len = REC_DATA_LEN(rec);
619 if (rec->lrh_type != OBD_CFG_REC) {
620 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
624 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
626 CERROR("Insane cfg\n");
630 /* We only care about markers */
631 if (lcfg->lcfg_command != LCFG_MARKER)
634 marker = lustre_cfg_buf(lcfg, 1);
635 if ((strcmp(mml->mml_marker.cm_comment, marker->cm_comment) == 0) &&
636 (strcmp(mml->mml_marker.cm_tgtname, marker->cm_tgtname) == 0) &&
637 !(marker->cm_flags & CM_SKIP)) {
638 /* Found a non-skipped marker match */
639 CDEBUG(D_MGS, "Changing rec %u marker %d %x->%x: %s %s\n",
640 rec->lrh_index, marker->cm_step,
641 marker->cm_flags, mml->mml_marker.cm_flags,
642 marker->cm_tgtname, marker->cm_comment);
643 /* Overwrite the old marker llog entry */
644 marker->cm_flags &= ~CM_EXCLUDE; /* in case we're unexcluding */
645 marker->cm_flags |= mml->mml_marker.cm_flags;
646 marker->cm_canceltime = mml->mml_marker.cm_canceltime;
647 /* Header and tail are added back to lrh_len in
648 llog_lvfs_write_rec */
649 rec->lrh_len = cfg_len;
650 rc = llog_write(env, llh, rec, NULL, 0, (void *)lcfg,
660 * Modify an existing config log record (for CM_SKIP or CM_EXCLUDE)
662 * 0 - modified successfully,
663 * 1 - no modification was done
666 static int mgs_modify(const struct lu_env *env, struct mgs_device *mgs,
667 struct fs_db *fsdb, struct mgs_target_info *mti,
668 char *logname, char *devname, char *comment, int flags)
670 struct llog_handle *loghandle;
671 struct llog_ctxt *ctxt;
672 struct mgs_modify_lookup *mml;
677 LASSERT(mutex_is_locked(&fsdb->fsdb_mutex));
678 CDEBUG(D_MGS, "modify %s/%s/%s fl=%x\n", logname, devname, comment,
681 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
682 LASSERT(ctxt != NULL);
683 rc = llog_open(env, ctxt, &loghandle, NULL, logname, LLOG_OPEN_EXISTS);
690 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
694 if (llog_get_size(loghandle) <= 1)
695 GOTO(out_close, rc = 0);
699 GOTO(out_close, rc = -ENOMEM);
700 strcpy(mml->mml_marker.cm_comment, comment);
701 strcpy(mml->mml_marker.cm_tgtname, devname);
702 /* Modify mostly means cancel */
703 mml->mml_marker.cm_flags = flags;
704 mml->mml_marker.cm_canceltime = flags ? cfs_time_current_sec() : 0;
705 mml->mml_modified = 0;
706 rc = llog_process(env, loghandle, mgs_modify_handler, (void *)mml,
708 if (!rc && !mml->mml_modified)
713 llog_close(env, loghandle);
716 CERROR("%s: modify %s/%s failed: rc = %d\n",
717 mgs->mgs_obd->obd_name, mti->mti_svname, comment, rc);
722 /** This structure is passed to mgs_replace_handler */
723 struct mgs_replace_uuid_lookup {
724 /* Nids are replaced for this target device */
725 struct mgs_target_info target;
726 /* Temporary modified llog */
727 struct llog_handle *temp_llh;
728 /* Flag is set if in target block*/
729 int in_target_device;
730 /* Nids already added. Just skip (multiple nids) */
731 int device_nids_added;
732 /* Flag is set if this block should not be copied */
737 * Check: a) if block should be skipped
738 * b) is it target block
743 * \retval 0 should not to be skipped
744 * \retval 1 should to be skipped
746 static int check_markers(struct lustre_cfg *lcfg,
747 struct mgs_replace_uuid_lookup *mrul)
749 struct cfg_marker *marker;
751 /* Track markers. Find given device */
752 if (lcfg->lcfg_command == LCFG_MARKER) {
753 marker = lustre_cfg_buf(lcfg, 1);
754 /* Clean llog from records marked as CM_EXCLUDE.
755 CM_SKIP records are used for "active" command
756 and can be restored if needed */
757 if ((marker->cm_flags & (CM_EXCLUDE | CM_START)) ==
758 (CM_EXCLUDE | CM_START)) {
763 if ((marker->cm_flags & (CM_EXCLUDE | CM_END)) ==
764 (CM_EXCLUDE | CM_END)) {
769 if (strcmp(mrul->target.mti_svname, marker->cm_tgtname) == 0) {
770 LASSERT(!(marker->cm_flags & CM_START) ||
771 !(marker->cm_flags & CM_END));
772 if (marker->cm_flags & CM_START) {
773 mrul->in_target_device = 1;
774 mrul->device_nids_added = 0;
775 } else if (marker->cm_flags & CM_END)
776 mrul->in_target_device = 0;
783 static int record_lcfg(const struct lu_env *env, struct llog_handle *llh,
784 struct lustre_cfg *lcfg)
786 struct llog_rec_hdr rec;
792 LASSERT(llh->lgh_ctxt);
794 buflen = lustre_cfg_len(lcfg->lcfg_bufcount,
796 rec.lrh_len = llog_data_len(buflen);
797 rec.lrh_type = OBD_CFG_REC;
799 /* idx = -1 means append */
800 rc = llog_write(env, llh, &rec, NULL, 0, (void *)lcfg, -1);
802 CERROR("failed %d\n", rc);
806 static int record_base(const struct lu_env *env, struct llog_handle *llh,
807 char *cfgname, lnet_nid_t nid, int cmd,
808 char *s1, char *s2, char *s3, char *s4)
810 struct mgs_thread_info *mgi = mgs_env_info(env);
811 struct lustre_cfg *lcfg;
814 CDEBUG(D_MGS, "lcfg %s %#x %s %s %s %s\n", cfgname,
815 cmd, s1, s2, s3, s4);
817 lustre_cfg_bufs_reset(&mgi->mgi_bufs, cfgname);
819 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, s1);
821 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 2, s2);
823 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 3, s3);
825 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 4, s4);
827 lcfg = lustre_cfg_new(cmd, &mgi->mgi_bufs);
830 lcfg->lcfg_nid = nid;
832 rc = record_lcfg(env, llh, lcfg);
834 lustre_cfg_free(lcfg);
837 CERROR("error %d: lcfg %s %#x %s %s %s %s\n", rc, cfgname,
838 cmd, s1, s2, s3, s4);
843 static inline int record_add_uuid(const struct lu_env *env,
844 struct llog_handle *llh,
845 uint64_t nid, char *uuid)
847 return record_base(env, llh, NULL, nid, LCFG_ADD_UUID, uuid, 0, 0, 0);
850 static inline int record_add_conn(const struct lu_env *env,
851 struct llog_handle *llh,
852 char *devname, char *uuid)
854 return record_base(env, llh, devname, 0, LCFG_ADD_CONN, uuid, 0, 0, 0);
857 static inline int record_attach(const struct lu_env *env,
858 struct llog_handle *llh, char *devname,
859 char *type, char *uuid)
861 return record_base(env, llh,devname, 0, LCFG_ATTACH, type, uuid, 0, 0);
864 static inline int record_setup(const struct lu_env *env,
865 struct llog_handle *llh, char *devname,
866 char *s1, char *s2, char *s3, char *s4)
868 return record_base(env, llh, devname, 0, LCFG_SETUP, s1, s2, s3, s4);
872 * \retval <0 record processing error
873 * \retval n record is processed. No need copy original one.
874 * \retval 0 record is not processed.
876 static int process_command(const struct lu_env *env, struct lustre_cfg *lcfg,
877 struct mgs_replace_uuid_lookup *mrul)
884 if (lcfg->lcfg_command == LCFG_ADD_UUID) {
885 /* LCFG_ADD_UUID command found. Let's skip original command
886 and add passed nids */
887 ptr = mrul->target.mti_params;
888 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
889 CDEBUG(D_MGS, "add nid %s with uuid %s, "
890 "device %s\n", libcfs_nid2str(nid),
891 mrul->target.mti_params,
892 mrul->target.mti_svname);
893 rc = record_add_uuid(env,
895 mrul->target.mti_params);
900 if (nids_added == 0) {
901 CERROR("No new nids were added, nid %s with uuid %s, "
902 "device %s\n", libcfs_nid2str(nid),
903 mrul->target.mti_params,
904 mrul->target.mti_svname);
907 mrul->device_nids_added = 1;
913 if (mrul->device_nids_added && lcfg->lcfg_command == LCFG_SETUP) {
914 /* LCFG_SETUP command found. UUID should be changed */
915 rc = record_setup(env,
917 /* devname the same */
918 lustre_cfg_string(lcfg, 0),
919 /* s1 is not changed */
920 lustre_cfg_string(lcfg, 1),
921 /* new uuid should be
923 mrul->target.mti_params,
924 /* s3 is not changed */
925 lustre_cfg_string(lcfg, 3),
926 /* s4 is not changed */
927 lustre_cfg_string(lcfg, 4));
931 /* Another commands in target device block */
936 * Handler that called for every record in llog.
937 * Records are processed in order they placed in llog.
939 * \param[in] llh log to be processed
940 * \param[in] rec current record
941 * \param[in] data mgs_replace_uuid_lookup structure
945 static int mgs_replace_handler(const struct lu_env *env,
946 struct llog_handle *llh,
947 struct llog_rec_hdr *rec,
950 struct llog_rec_hdr local_rec = *rec;
951 struct mgs_replace_uuid_lookup *mrul;
952 struct lustre_cfg *lcfg = REC_DATA(rec);
953 int cfg_len = REC_DATA_LEN(rec);
957 mrul = (struct mgs_replace_uuid_lookup *)data;
959 if (rec->lrh_type != OBD_CFG_REC) {
960 CERROR("unhandled lrh_type: %#x, cmd %x %s %s\n",
961 rec->lrh_type, lcfg->lcfg_command,
962 lustre_cfg_string(lcfg, 0),
963 lustre_cfg_string(lcfg, 1));
967 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
969 /* Do not copy any invalidated records */
970 GOTO(skip_out, rc = 0);
973 rc = check_markers(lcfg, mrul);
974 if (rc || mrul->skip_it)
975 GOTO(skip_out, rc = 0);
977 /* Write to new log all commands outside target device block */
978 if (!mrul->in_target_device)
979 GOTO(copy_out, rc = 0);
981 /* Skip all other LCFG_ADD_UUID and LCFG_ADD_CONN records
982 (failover nids) for this target, assuming that if then
983 primary is changing then so is the failover */
984 if (mrul->device_nids_added &&
985 (lcfg->lcfg_command == LCFG_ADD_UUID ||
986 lcfg->lcfg_command == LCFG_ADD_CONN))
987 GOTO(skip_out, rc = 0);
989 rc = process_command(env, lcfg, mrul);
996 /* Record is placed in temporary llog as is */
997 local_rec.lrh_len -= sizeof(*rec) + sizeof(struct llog_rec_tail);
998 rc = llog_write(env, mrul->temp_llh, &local_rec, NULL, 0,
1001 CDEBUG(D_MGS, "Copied idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
1002 rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
1003 lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
1007 CDEBUG(D_MGS, "Skipped idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
1008 rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
1009 lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
1013 static int mgs_backup_llog(const struct lu_env *env,
1014 struct obd_device *mgs,
1015 char *fsname, char *backup)
1017 struct obd_uuid *uuid;
1018 struct llog_handle *orig_llh, *bak_llh;
1019 struct llog_ctxt *lctxt;
1023 lctxt = llog_get_context(mgs, LLOG_CONFIG_ORIG_CTXT);
1025 CERROR("%s: missing llog context\n", mgs->obd_name);
1026 GOTO(out, rc = -EINVAL);
1029 /* Make sure there's no old backup log */
1030 rc = llog_erase(env, lctxt, NULL, backup);
1031 if (rc < 0 && rc != -ENOENT)
1034 /* open backup log */
1035 rc = llog_open_create(env, lctxt, &bak_llh, NULL, backup);
1037 CERROR("%s: backup logfile open %s: rc = %d\n",
1038 mgs->obd_name, backup, rc);
1042 /* set the log header uuid */
1043 OBD_ALLOC_PTR(uuid);
1045 GOTO(out_put, rc = -ENOMEM);
1046 obd_str2uuid(uuid, backup);
1047 rc = llog_init_handle(env, bak_llh, LLOG_F_IS_PLAIN, uuid);
1050 GOTO(out_close1, rc);
1052 /* open original log */
1053 rc = llog_open(env, lctxt, &orig_llh, NULL, fsname,
1058 GOTO(out_close1, rc);
1061 rc = llog_init_handle(env, orig_llh, LLOG_F_IS_PLAIN, NULL);
1063 GOTO(out_close2, rc);
1065 /* Copy remote log */
1066 rc = llog_process(env, orig_llh, llog_copy_handler,
1067 (void *)bak_llh, NULL);
1070 rc2 = llog_close(env, orig_llh);
1074 rc2 = llog_close(env, bak_llh);
1079 llog_ctxt_put(lctxt);
1082 CERROR("%s: Failed to backup log %s: rc = %d\n",
1083 mgs->obd_name, fsname, rc);
1087 static int mgs_log_is_empty(const struct lu_env *env, struct mgs_device *mgs,
1090 static int mgs_replace_nids_log(const struct lu_env *env,
1091 struct obd_device *mgs, struct fs_db *fsdb,
1092 char *logname, char *devname, char *nids)
1094 struct llog_handle *orig_llh, *backup_llh;
1095 struct llog_ctxt *ctxt;
1096 struct mgs_replace_uuid_lookup *mrul;
1097 struct mgs_device *mgs_dev = lu2mgs_dev(mgs->obd_lu_dev);
1102 CDEBUG(D_MGS, "Replace nids for %s in %s\n", devname, logname);
1104 ctxt = llog_get_context(mgs, LLOG_CONFIG_ORIG_CTXT);
1105 LASSERT(ctxt != NULL);
1107 if (mgs_log_is_empty(env, mgs_dev, logname)) {
1108 /* Log is empty. Nothing to replace */
1109 GOTO(out_put, rc = 0);
1112 OBD_ALLOC(backup, strlen(logname) + 5);
1114 GOTO(out_put, rc = -ENOMEM);
1116 sprintf(backup, "%s.bak", logname);
1118 rc = mgs_backup_llog(env, mgs, logname, backup);
1120 CERROR("%s: can't make backup for %s: rc = %d\n",
1121 mgs->obd_name, logname, rc);
1125 /* Now erase original log file. Connections are not allowed.
1126 Backup is already saved */
1127 rc = llog_erase(env, ctxt, NULL, logname);
1128 if (rc < 0 && rc != -ENOENT)
1131 /* open local log */
1132 rc = llog_open_create(env, ctxt, &orig_llh, NULL, logname);
1134 GOTO(out_restore, rc);
1136 rc = llog_init_handle(env, orig_llh, LLOG_F_IS_PLAIN, NULL);
1138 GOTO(out_closel, rc);
1140 /* open backup llog */
1141 rc = llog_open(env, ctxt, &backup_llh, NULL, backup,
1144 GOTO(out_closel, rc);
1146 rc = llog_init_handle(env, backup_llh, LLOG_F_IS_PLAIN, NULL);
1148 GOTO(out_close, rc);
1150 if (llog_get_size(backup_llh) <= 1)
1151 GOTO(out_close, rc = 0);
1153 OBD_ALLOC_PTR(mrul);
1155 GOTO(out_close, rc = -ENOMEM);
1156 /* devname is only needed information to replace UUID records */
1157 strncpy(mrul->target.mti_svname, devname, MTI_NAME_MAXLEN);
1158 /* parse nids later */
1159 strncpy(mrul->target.mti_params, nids, MTI_PARAM_MAXLEN);
1160 /* Copy records to this temporary llog */
1161 mrul->temp_llh = orig_llh;
1163 rc = llog_process(env, backup_llh, mgs_replace_handler,
1164 (void *)mrul, NULL);
1167 rc2 = llog_close(NULL, backup_llh);
1171 rc2 = llog_close(NULL, orig_llh);
1177 CERROR("%s: llog should be restored: rc = %d\n",
1179 rc2 = mgs_backup_llog(env, mgs, backup, logname);
1181 CERROR("%s: can't restore backup %s: rc = %d\n",
1182 mgs->obd_name, logname, rc2);
1186 OBD_FREE(backup, strlen(backup) + 5);
1189 llog_ctxt_put(ctxt);
1192 CERROR("%s: failed to replace nids in log %s: rc = %d\n",
1193 mgs->obd_name, logname, rc);
1199 * Parse device name and get file system name and/or device index
1201 * \param[in] devname device name (ex. lustre-MDT0000)
1202 * \param[out] fsname file system name(optional)
1203 * \param[out] index device index(optional)
1207 static int mgs_parse_devname(char *devname, char *fsname, __u32 *index)
1212 /* Extract fsname */
1213 ptr = strrchr(devname, '-');
1217 CDEBUG(D_MGS, "Device name %s without fsname\n",
1221 memset(fsname, 0, MTI_NAME_MAXLEN);
1222 strncpy(fsname, devname, ptr - devname);
1223 fsname[MTI_NAME_MAXLEN - 1] = 0;
1227 if (server_name2index(ptr, index, NULL) < 0) {
1228 CDEBUG(D_MGS, "Device name with wrong index\n");
1236 static int only_mgs_is_running(struct obd_device *mgs_obd)
1238 /* TDB: Is global variable with devices count exists? */
1239 int num_devices = get_devices_count();
1240 /* osd, MGS and MGC + self_export
1241 (wc -l /proc/fs/lustre/devices <= 2) && (num_exports <= 2) */
1242 return (num_devices <= 3) && (mgs_obd->obd_num_exports <= 2);
1245 static int name_create_mdt(char **logname, char *fsname, int i)
1249 sprintf(mdt_index, "-MDT%04x", i);
1250 return name_create(logname, fsname, mdt_index);
1254 * Replace nids for \a device to \a nids values
1256 * \param obd MGS obd device
1257 * \param devname nids need to be replaced for this device
1258 * (ex. lustre-OST0000)
1259 * \param nids nids list (ex. nid1,nid2,nid3)
1263 int mgs_replace_nids(const struct lu_env *env,
1264 struct mgs_device *mgs,
1265 char *devname, char *nids)
1267 /* Assume fsname is part of device name */
1268 char fsname[MTI_NAME_MAXLEN];
1275 struct obd_device *mgs_obd = mgs->mgs_obd;
1278 /* We can only change NIDs if no other nodes are connected */
1279 spin_lock(&mgs_obd->obd_dev_lock);
1280 conn_state = mgs_obd->obd_no_conn;
1281 mgs_obd->obd_no_conn = 1;
1282 spin_unlock(&mgs_obd->obd_dev_lock);
1284 /* We can not change nids if not only MGS is started */
1285 if (!only_mgs_is_running(mgs_obd)) {
1286 CERROR("Only MGS is allowed to be started\n");
1287 GOTO(out, rc = -EINPROGRESS);
1290 /* Get fsname and index*/
1291 rc = mgs_parse_devname(devname, fsname, &index);
1295 rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
1297 CERROR("%s: can't find fsdb: rc = %d\n", fsname, rc);
1301 /* Process client llogs */
1302 name_create(&logname, fsname, "-client");
1303 rc = mgs_replace_nids_log(env, mgs_obd, fsdb, logname, devname, nids);
1304 name_destroy(&logname);
1306 CERROR("%s: error while replacing NIDs for %s: rc = %d\n",
1307 fsname, devname, rc);
1311 /* Process MDT llogs */
1312 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
1313 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
1315 name_create_mdt(&logname, fsname, i);
1316 rc = mgs_replace_nids_log(env, mgs_obd, fsdb, logname, devname, nids);
1317 name_destroy(&logname);
1323 spin_lock(&mgs_obd->obd_dev_lock);
1324 mgs_obd->obd_no_conn = conn_state;
1325 spin_unlock(&mgs_obd->obd_dev_lock);
1330 static int record_lov_setup(const struct lu_env *env, struct llog_handle *llh,
1331 char *devname, struct lov_desc *desc)
1333 struct mgs_thread_info *mgi = mgs_env_info(env);
1334 struct lustre_cfg *lcfg;
1337 lustre_cfg_bufs_reset(&mgi->mgi_bufs, devname);
1338 lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, desc, sizeof(*desc));
1339 lcfg = lustre_cfg_new(LCFG_SETUP, &mgi->mgi_bufs);
1342 rc = record_lcfg(env, llh, lcfg);
1344 lustre_cfg_free(lcfg);
1348 static int record_lmv_setup(const struct lu_env *env, struct llog_handle *llh,
1349 char *devname, struct lmv_desc *desc)
1351 struct mgs_thread_info *mgi = mgs_env_info(env);
1352 struct lustre_cfg *lcfg;
1355 lustre_cfg_bufs_reset(&mgi->mgi_bufs, devname);
1356 lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, desc, sizeof(*desc));
1357 lcfg = lustre_cfg_new(LCFG_SETUP, &mgi->mgi_bufs);
1359 rc = record_lcfg(env, llh, lcfg);
1361 lustre_cfg_free(lcfg);
1365 static inline int record_mdc_add(const struct lu_env *env,
1366 struct llog_handle *llh,
1367 char *logname, char *mdcuuid,
1368 char *mdtuuid, char *index,
1371 return record_base(env,llh,logname,0,LCFG_ADD_MDC,
1372 mdtuuid,index,gen,mdcuuid);
1375 static inline int record_lov_add(const struct lu_env *env,
1376 struct llog_handle *llh,
1377 char *lov_name, char *ost_uuid,
1378 char *index, char *gen)
1380 return record_base(env,llh,lov_name,0,LCFG_LOV_ADD_OBD,
1381 ost_uuid, index, gen, 0);
1384 static inline int record_mount_opt(const struct lu_env *env,
1385 struct llog_handle *llh,
1386 char *profile, char *lov_name,
1389 return record_base(env,llh,NULL,0,LCFG_MOUNTOPT,
1390 profile,lov_name,mdc_name,0);
1393 static int record_marker(const struct lu_env *env,
1394 struct llog_handle *llh,
1395 struct fs_db *fsdb, __u32 flags,
1396 char *tgtname, char *comment)
1398 struct mgs_thread_info *mgi = mgs_env_info(env);
1399 struct lustre_cfg *lcfg;
1402 if (flags & CM_START)
1404 mgi->mgi_marker.cm_step = fsdb->fsdb_gen;
1405 mgi->mgi_marker.cm_flags = flags;
1406 mgi->mgi_marker.cm_vers = LUSTRE_VERSION_CODE;
1407 strncpy(mgi->mgi_marker.cm_tgtname, tgtname,
1408 sizeof(mgi->mgi_marker.cm_tgtname));
1409 strncpy(mgi->mgi_marker.cm_comment, comment,
1410 sizeof(mgi->mgi_marker.cm_comment));
1411 mgi->mgi_marker.cm_createtime = cfs_time_current_sec();
1412 mgi->mgi_marker.cm_canceltime = 0;
1413 lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL);
1414 lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, &mgi->mgi_marker,
1415 sizeof(mgi->mgi_marker));
1416 lcfg = lustre_cfg_new(LCFG_MARKER, &mgi->mgi_bufs);
1419 rc = record_lcfg(env, llh, lcfg);
1421 lustre_cfg_free(lcfg);
1425 static int record_start_log(const struct lu_env *env, struct mgs_device *mgs,
1426 struct llog_handle **llh, char *name)
1428 static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" };
1429 struct llog_ctxt *ctxt;
1433 GOTO(out, rc = -EBUSY);
1435 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
1437 GOTO(out, rc = -ENODEV);
1438 LASSERT(ctxt->loc_obd == mgs->mgs_obd);
1440 rc = llog_open_create(env, ctxt, llh, NULL, name);
1443 rc = llog_init_handle(env, *llh, LLOG_F_IS_PLAIN, &cfg_uuid);
1445 llog_close(env, *llh);
1447 llog_ctxt_put(ctxt);
1450 CERROR("%s: can't start log %s: rc = %d\n",
1451 mgs->mgs_obd->obd_name, name, rc);
1457 static int record_end_log(const struct lu_env *env, struct llog_handle **llh)
1461 rc = llog_close(env, *llh);
1467 static int mgs_log_is_empty(const struct lu_env *env,
1468 struct mgs_device *mgs, char *name)
1470 struct llog_handle *llh;
1471 struct llog_ctxt *ctxt;
1474 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
1475 LASSERT(ctxt != NULL);
1476 rc = llog_open(env, ctxt, &llh, NULL, name, LLOG_OPEN_EXISTS);
1483 llog_init_handle(env, llh, LLOG_F_IS_PLAIN, NULL);
1485 GOTO(out_close, rc);
1486 rc = llog_get_size(llh);
1489 llog_close(env, llh);
1491 llog_ctxt_put(ctxt);
1492 /* header is record 1 */
1496 /******************** config "macros" *********************/
1498 /* write an lcfg directly into a log (with markers) */
1499 static int mgs_write_log_direct(const struct lu_env *env,
1500 struct mgs_device *mgs, struct fs_db *fsdb,
1501 char *logname, struct lustre_cfg *lcfg,
1502 char *devname, char *comment)
1504 struct llog_handle *llh = NULL;
1511 rc = record_start_log(env, mgs, &llh, logname);
1515 /* FIXME These should be a single journal transaction */
1516 rc = record_marker(env, llh, fsdb, CM_START, devname, comment);
1519 rc = record_lcfg(env, llh, lcfg);
1522 rc = record_marker(env, llh, fsdb, CM_END, devname, comment);
1526 record_end_log(env, &llh);
1530 /* write the lcfg in all logs for the given fs */
1531 int mgs_write_log_direct_all(const struct lu_env *env,
1532 struct mgs_device *mgs,
1534 struct mgs_target_info *mti,
1535 struct lustre_cfg *lcfg,
1536 char *devname, char *comment,
1540 struct mgs_direntry *dirent, *n;
1541 char *fsname = mti->mti_fsname;
1543 int rc = 0, len = strlen(fsname);
1546 /* We need to set params for any future logs
1547 as well. FIXME Append this file to every new log.
1548 Actually, we should store as params (text), not llogs. Or
1550 rc = name_create(&logname, fsname, "-params");
1553 if (mgs_log_is_empty(env, mgs, logname)) {
1554 struct llog_handle *llh = NULL;
1555 rc = record_start_log(env, mgs, &llh, logname);
1556 record_end_log(env, &llh);
1558 name_destroy(&logname);
1562 /* Find all the logs in the CONFIGS directory */
1563 rc = class_dentry_readdir(env, mgs, &list);
1567 /* Could use fsdb index maps instead of directory listing */
1568 cfs_list_for_each_entry_safe(dirent, n, &list, list) {
1569 cfs_list_del(&dirent->list);
1570 /* don't write to sptlrpc rule log */
1571 if (strstr(dirent->name, "-sptlrpc") != NULL)
1574 /* caller wants write server logs only */
1575 if (server_only && strstr(dirent->name, "-client") != NULL)
1578 if (strncmp(fsname, dirent->name, len) == 0) {
1579 CDEBUG(D_MGS, "Changing log %s\n", dirent->name);
1580 /* Erase any old settings of this same parameter */
1581 rc = mgs_modify(env, mgs, fsdb, mti, dirent->name,
1582 devname, comment, CM_SKIP);
1584 CERROR("%s: Can't modify llog %s: rc = %d\n",
1585 mgs->mgs_obd->obd_name, dirent->name,rc);
1586 /* Write the new one */
1588 rc = mgs_write_log_direct(env, mgs, fsdb,
1593 CERROR("%s: writing log %s: rc = %d\n",
1594 mgs->mgs_obd->obd_name,
1599 mgs_direntry_free(dirent);
1605 static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
1606 struct mgs_device *mgs,
1608 struct mgs_target_info *mti,
1609 int index, char *logname);
1610 static int mgs_write_log_osc_to_lov(const struct lu_env *env,
1611 struct mgs_device *mgs,
1613 struct mgs_target_info *mti,
1614 char *logname, char *suffix, char *lovname,
1615 enum lustre_sec_part sec_part, int flags);
1616 static int name_create_mdt_and_lov(char **logname, char **lovname,
1617 struct fs_db *fsdb, int i);
1619 static int add_param(char *params, char *key, char *val)
1621 char *start = params + strlen(params);
1622 char *end = params + sizeof(((struct mgs_target_info *)0)->mti_params);
1626 keylen = strlen(key);
1627 if (start + 1 + keylen + strlen(val) >= end) {
1628 CERROR("params are too long: %s %s%s\n",
1629 params, key != NULL ? key : "", val);
1633 sprintf(start, " %s%s", key != NULL ? key : "", val);
1637 static int mgs_steal_llog_handler(const struct lu_env *env,
1638 struct llog_handle *llh,
1639 struct llog_rec_hdr *rec, void *data)
1641 struct mgs_device *mgs;
1642 struct obd_device *obd;
1643 struct mgs_target_info *mti, *tmti;
1645 int cfg_len = rec->lrh_len;
1646 char *cfg_buf = (char*) (rec + 1);
1647 struct lustre_cfg *lcfg;
1649 struct llog_handle *mdt_llh = NULL;
1650 static int got_an_osc_or_mdc = 0;
1651 /* 0: not found any osc/mdc;
1655 static int last_step = -1;
1659 mti = ((struct temp_comp*)data)->comp_mti;
1660 tmti = ((struct temp_comp*)data)->comp_tmti;
1661 fsdb = ((struct temp_comp*)data)->comp_fsdb;
1662 obd = ((struct temp_comp *)data)->comp_obd;
1663 mgs = lu2mgs_dev(obd->obd_lu_dev);
1666 if (rec->lrh_type != OBD_CFG_REC) {
1667 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
1671 rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
1673 CERROR("Insane cfg\n");
1677 lcfg = (struct lustre_cfg *)cfg_buf;
1679 if (lcfg->lcfg_command == LCFG_MARKER) {
1680 struct cfg_marker *marker;
1681 marker = lustre_cfg_buf(lcfg, 1);
1682 if (!strncmp(marker->cm_comment,"add osc",7) &&
1683 (marker->cm_flags & CM_START) &&
1684 !(marker->cm_flags & CM_SKIP)) {
1685 got_an_osc_or_mdc = 1;
1686 strncpy(tmti->mti_svname, marker->cm_tgtname,
1687 sizeof(tmti->mti_svname));
1688 rc = record_start_log(env, mgs, &mdt_llh,
1692 rc = record_marker(env, mdt_llh, fsdb, CM_START,
1693 mti->mti_svname,"add osc(copied)");
1694 record_end_log(env, &mdt_llh);
1695 last_step = marker->cm_step;
1698 if (!strncmp(marker->cm_comment,"add osc",7) &&
1699 (marker->cm_flags & CM_END) &&
1700 !(marker->cm_flags & CM_SKIP)) {
1701 LASSERT(last_step == marker->cm_step);
1703 got_an_osc_or_mdc = 0;
1704 rc = record_start_log(env, mgs, &mdt_llh,
1708 rc = record_marker(env, mdt_llh, fsdb, CM_END,
1709 mti->mti_svname,"add osc(copied)");
1710 record_end_log(env, &mdt_llh);
1713 if (!strncmp(marker->cm_comment,"add mdc",7) &&
1714 (marker->cm_flags & CM_START) &&
1715 !(marker->cm_flags & CM_SKIP)) {
1716 got_an_osc_or_mdc = 2;
1717 last_step = marker->cm_step;
1718 memcpy(tmti->mti_svname, marker->cm_tgtname,
1719 strlen(marker->cm_tgtname));
1723 if (!strncmp(marker->cm_comment,"add mdc",7) &&
1724 (marker->cm_flags & CM_END) &&
1725 !(marker->cm_flags & CM_SKIP)) {
1726 LASSERT(last_step == marker->cm_step);
1728 got_an_osc_or_mdc = 0;
1733 if (got_an_osc_or_mdc == 0 || last_step < 0)
1736 if (lcfg->lcfg_command == LCFG_ADD_UUID) {
1737 uint64_t nodenid = lcfg->lcfg_nid;
1739 if (strlen(tmti->mti_uuid) == 0) {
1740 /* target uuid not set, this config record is before
1741 * LCFG_SETUP, this nid is one of target node nid.
1743 tmti->mti_nids[tmti->mti_nid_count] = nodenid;
1744 tmti->mti_nid_count++;
1746 /* failover node nid */
1747 rc = add_param(tmti->mti_params, PARAM_FAILNODE,
1748 libcfs_nid2str(nodenid));
1754 if (lcfg->lcfg_command == LCFG_SETUP) {
1757 target = lustre_cfg_string(lcfg, 1);
1758 memcpy(tmti->mti_uuid, target, strlen(target));
1762 /* ignore client side sptlrpc_conf_log */
1763 if (lcfg->lcfg_command == LCFG_SPTLRPC_CONF)
1766 if (lcfg->lcfg_command == LCFG_ADD_MDC) {
1769 if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1)
1772 memcpy(tmti->mti_fsname, mti->mti_fsname,
1773 strlen(mti->mti_fsname));
1774 tmti->mti_stripe_index = index;
1776 rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb, tmti,
1777 mti->mti_stripe_index,
1779 memset(tmti, 0, sizeof(*tmti));
1783 if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD) {
1786 char *logname, *lovname;
1788 rc = name_create_mdt_and_lov(&logname, &lovname, fsdb,
1789 mti->mti_stripe_index);
1792 sprintf(mdt_index, "-MDT%04x", mti->mti_stripe_index);
1794 if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1) {
1795 name_destroy(&logname);
1796 name_destroy(&lovname);
1800 tmti->mti_stripe_index = index;
1801 rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, tmti, logname,
1804 name_destroy(&logname);
1805 name_destroy(&lovname);
1811 /* fsdb->fsdb_mutex is already held in mgs_write_log_target*/
1812 /* stealed from mgs_get_fsdb_from_llog*/
1813 static int mgs_steal_llog_for_mdt_from_client(const struct lu_env *env,
1814 struct mgs_device *mgs,
1816 struct temp_comp* comp)
1818 struct llog_handle *loghandle;
1819 struct mgs_target_info *tmti;
1820 struct llog_ctxt *ctxt;
1825 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
1826 LASSERT(ctxt != NULL);
1828 OBD_ALLOC_PTR(tmti);
1830 GOTO(out_ctxt, rc = -ENOMEM);
1832 comp->comp_tmti = tmti;
1833 comp->comp_obd = mgs->mgs_obd;
1835 rc = llog_open(env, ctxt, &loghandle, NULL, client_name,
1843 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
1845 GOTO(out_close, rc);
1847 rc = llog_process_or_fork(env, loghandle, mgs_steal_llog_handler,
1848 (void *)comp, NULL, false);
1849 CDEBUG(D_MGS, "steal llog re = %d\n", rc);
1851 llog_close(env, loghandle);
1855 llog_ctxt_put(ctxt);
1859 /* lmv is the second thing for client logs */
1860 /* copied from mgs_write_log_lov. Please refer to that. */
1861 static int mgs_write_log_lmv(const struct lu_env *env,
1862 struct mgs_device *mgs,
1864 struct mgs_target_info *mti,
1865 char *logname, char *lmvname)
1867 struct llog_handle *llh = NULL;
1868 struct lmv_desc *lmvdesc;
1873 CDEBUG(D_MGS, "Writing lmv(%s) log for %s\n", lmvname,logname);
1875 OBD_ALLOC_PTR(lmvdesc);
1876 if (lmvdesc == NULL)
1878 lmvdesc->ld_active_tgt_count = 0;
1879 lmvdesc->ld_tgt_count = 0;
1880 sprintf((char*)lmvdesc->ld_uuid.uuid, "%s_UUID", lmvname);
1881 uuid = (char *)lmvdesc->ld_uuid.uuid;
1883 rc = record_start_log(env, mgs, &llh, logname);
1886 rc = record_marker(env, llh, fsdb, CM_START, lmvname, "lmv setup");
1889 rc = record_attach(env, llh, lmvname, "lmv", uuid);
1892 rc = record_lmv_setup(env, llh, lmvname, lmvdesc);
1895 rc = record_marker(env, llh, fsdb, CM_END, lmvname, "lmv setup");
1899 record_end_log(env, &llh);
1901 OBD_FREE_PTR(lmvdesc);
1905 /* lov is the first thing in the mdt and client logs */
1906 static int mgs_write_log_lov(const struct lu_env *env, struct mgs_device *mgs,
1907 struct fs_db *fsdb, struct mgs_target_info *mti,
1908 char *logname, char *lovname)
1910 struct llog_handle *llh = NULL;
1911 struct lov_desc *lovdesc;
1916 CDEBUG(D_MGS, "Writing lov(%s) log for %s\n", lovname, logname);
1919 #01 L attach 0:lov_mdsA 1:lov 2:71ccb_lov_mdsA_19f961a9e1
1920 #02 L lov_setup 0:lov_mdsA 1:(struct lov_desc)
1921 uuid=lov1_UUID, stripe count=1, size=1048576, offset=0, pattern=0
1924 /* FIXME just make lov_setup accept empty desc (put uuid in buf 2) */
1925 OBD_ALLOC_PTR(lovdesc);
1926 if (lovdesc == NULL)
1928 lovdesc->ld_magic = LOV_DESC_MAGIC;
1929 lovdesc->ld_tgt_count = 0;
1930 /* Defaults. Can be changed later by lcfg config_param */
1931 lovdesc->ld_default_stripe_count = 1;
1932 lovdesc->ld_pattern = LOV_PATTERN_RAID0;
1933 lovdesc->ld_default_stripe_size = 1024 * 1024;
1934 lovdesc->ld_default_stripe_offset = -1;
1935 lovdesc->ld_qos_maxage = QOS_DEFAULT_MAXAGE;
1936 sprintf((char*)lovdesc->ld_uuid.uuid, "%s_UUID", lovname);
1937 /* can these be the same? */
1938 uuid = (char *)lovdesc->ld_uuid.uuid;
1940 /* This should always be the first entry in a log.
1941 rc = mgs_clear_log(obd, logname); */
1942 rc = record_start_log(env, mgs, &llh, logname);
1945 /* FIXME these should be a single journal transaction */
1946 rc = record_marker(env, llh, fsdb, CM_START, lovname, "lov setup");
1949 rc = record_attach(env, llh, lovname, "lov", uuid);
1952 rc = record_lov_setup(env, llh, lovname, lovdesc);
1955 rc = record_marker(env, llh, fsdb, CM_END, lovname, "lov setup");
1960 record_end_log(env, &llh);
1962 OBD_FREE_PTR(lovdesc);
1966 /* add failnids to open log */
1967 static int mgs_write_log_failnids(const struct lu_env *env,
1968 struct mgs_target_info *mti,
1969 struct llog_handle *llh,
1972 char *failnodeuuid = NULL;
1973 char *ptr = mti->mti_params;
1978 #03 L add_uuid nid=uml1@tcp(0x20000c0a80201) nal=90 0: 1:uml1_UUID
1979 #04 L add_uuid nid=1@elan(0x1000000000001) nal=90 0: 1:uml1_UUID
1980 #05 L setup 0:OSC_uml1_ost1_mdsA 1:ost1_UUID 2:uml1_UUID
1981 #06 L add_uuid nid=uml2@tcp(0x20000c0a80202) nal=90 0: 1:uml2_UUID
1982 #0x L add_uuid nid=2@elan(0x1000000000002) nal=90 0: 1:uml2_UUID
1983 #07 L add_conn 0:OSC_uml1_ost1_mdsA 1:uml2_UUID
1986 /* Pull failnid info out of params string */
1987 while (class_find_param(ptr, PARAM_FAILNODE, &ptr) == 0) {
1988 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
1989 if (failnodeuuid == NULL) {
1990 /* We don't know the failover node name,
1991 so just use the first nid as the uuid */
1992 rc = name_create(&failnodeuuid,
1993 libcfs_nid2str(nid), "");
1997 CDEBUG(D_MGS, "add nid %s for failover uuid %s, "
1998 "client %s\n", libcfs_nid2str(nid),
1999 failnodeuuid, cliname);
2000 rc = record_add_uuid(env, llh, nid, failnodeuuid);
2003 rc = record_add_conn(env, llh, cliname, failnodeuuid);
2006 name_destroy(&failnodeuuid);
2010 static int mgs_write_log_mdc_to_lmv(const struct lu_env *env,
2011 struct mgs_device *mgs,
2013 struct mgs_target_info *mti,
2014 char *logname, char *lmvname)
2016 struct llog_handle *llh = NULL;
2017 char *mdcname = NULL;
2018 char *nodeuuid = NULL;
2019 char *mdcuuid = NULL;
2020 char *lmvuuid = NULL;
2025 if (mgs_log_is_empty(env, mgs, logname)) {
2026 CERROR("log is empty! Logical error\n");
2030 CDEBUG(D_MGS, "adding mdc for %s to log %s:lmv(%s)\n",
2031 mti->mti_svname, logname, lmvname);
2033 rc = name_create(&nodeuuid, libcfs_nid2str(mti->mti_nids[0]), "");
2036 rc = name_create(&mdcname, mti->mti_svname, "-mdc");
2039 rc = name_create(&mdcuuid, mdcname, "_UUID");
2042 rc = name_create(&lmvuuid, lmvname, "_UUID");
2046 rc = record_start_log(env, mgs, &llh, logname);
2049 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
2053 for (i = 0; i < mti->mti_nid_count; i++) {
2054 CDEBUG(D_MGS, "add nid %s for mdt\n",
2055 libcfs_nid2str(mti->mti_nids[i]));
2057 rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid);
2062 rc = record_attach(env, llh, mdcname, LUSTRE_MDC_NAME, lmvuuid);
2065 rc = record_setup(env, llh, mdcname, mti->mti_uuid, nodeuuid, 0, 0);
2068 rc = mgs_write_log_failnids(env, mti, llh, mdcname);
2071 snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
2072 rc = record_mdc_add(env, llh, lmvname, mdcuuid, mti->mti_uuid,
2076 rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname,
2081 record_end_log(env, &llh);
2083 name_destroy(&lmvuuid);
2084 name_destroy(&mdcuuid);
2085 name_destroy(&mdcname);
2086 name_destroy(&nodeuuid);
2090 static inline int name_create_lov(char **lovname, char *mdtname,
2091 struct fs_db *fsdb, int index)
2094 if (index == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2095 return name_create(lovname, fsdb->fsdb_name, "-mdtlov");
2097 return name_create(lovname, mdtname, "-mdtlov");
2100 static int name_create_mdt_and_lov(char **logname, char **lovname,
2101 struct fs_db *fsdb, int i)
2105 rc = name_create_mdt(logname, fsdb->fsdb_name, i);
2109 if (i == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2110 rc = name_create(lovname, fsdb->fsdb_name, "-mdtlov");
2112 rc = name_create(lovname, *logname, "-mdtlov");
2114 name_destroy(logname);
2120 static inline int name_create_mdt_osc(char **oscname, char *ostname,
2121 struct fs_db *fsdb, int i)
2125 if (i == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2126 sprintf(suffix, "-osc");
2128 sprintf(suffix, "-osc-MDT%04x", i);
2129 return name_create(oscname, ostname, suffix);
2132 /* add new mdc to already existent MDS */
2133 static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
2134 struct mgs_device *mgs,
2136 struct mgs_target_info *mti,
2137 int mdt_index, char *logname)
2139 struct llog_handle *llh = NULL;
2140 char *nodeuuid = NULL;
2141 char *ospname = NULL;
2142 char *lovuuid = NULL;
2143 char *mdtuuid = NULL;
2144 char *svname = NULL;
2145 char *mdtname = NULL;
2146 char *lovname = NULL;
2151 if (mgs_log_is_empty(env, mgs, mti->mti_svname)) {
2152 CERROR("log is empty! Logical error\n");
2156 CDEBUG(D_MGS, "adding osp index %d to %s\n", mti->mti_stripe_index,
2159 rc = name_create_mdt(&mdtname, fsdb->fsdb_name, mti->mti_stripe_index);
2163 rc = name_create(&nodeuuid, libcfs_nid2str(mti->mti_nids[0]), "");
2165 GOTO(out_destory, rc);
2167 rc = name_create(&svname, mdtname, "-osp");
2169 GOTO(out_destory, rc);
2171 sprintf(index_str, "-MDT%04x", mdt_index);
2172 rc = name_create(&ospname, svname, index_str);
2174 GOTO(out_destory, rc);
2176 rc = name_create_lov(&lovname, logname, fsdb, mdt_index);
2178 GOTO(out_destory, rc);
2180 rc = name_create(&lovuuid, lovname, "_UUID");
2182 GOTO(out_destory, rc);
2184 rc = name_create(&mdtuuid, mdtname, "_UUID");
2186 GOTO(out_destory, rc);
2188 rc = record_start_log(env, mgs, &llh, logname);
2190 GOTO(out_destory, rc);
2192 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
2195 GOTO(out_destory, rc);
2197 for (i = 0; i < mti->mti_nid_count; i++) {
2198 CDEBUG(D_MGS, "add nid %s for mdt\n",
2199 libcfs_nid2str(mti->mti_nids[i]));
2200 rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid);
2205 rc = record_attach(env, llh, ospname, LUSTRE_OSP_NAME, lovuuid);
2209 rc = record_setup(env, llh, ospname, mti->mti_uuid, nodeuuid,
2214 rc = mgs_write_log_failnids(env, mti, llh, ospname);
2218 /* Add mdc(osp) to lod */
2219 snprintf(index_str, sizeof(mti->mti_stripe_index), "%d",
2220 mti->mti_stripe_index);
2221 rc = record_base(env, llh, lovname, 0, LCFG_ADD_MDC, mti->mti_uuid,
2222 index_str, "1", NULL);
2226 rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname, "add osp");
2231 record_end_log(env, &llh);
2234 name_destroy(&mdtuuid);
2235 name_destroy(&lovuuid);
2236 name_destroy(&lovname);
2237 name_destroy(&ospname);
2238 name_destroy(&svname);
2239 name_destroy(&nodeuuid);
2240 name_destroy(&mdtname);
2244 static int mgs_write_log_mdt0(const struct lu_env *env,
2245 struct mgs_device *mgs,
2247 struct mgs_target_info *mti)
2249 char *log = mti->mti_svname;
2250 struct llog_handle *llh = NULL;
2251 char *uuid, *lovname;
2253 char *ptr = mti->mti_params;
2254 int rc = 0, failout = 0;
2257 OBD_ALLOC(uuid, sizeof(struct obd_uuid));
2261 if (class_find_param(ptr, PARAM_FAILMODE, &ptr) == 0)
2262 failout = (strncmp(ptr, "failout", 7) == 0);
2264 rc = name_create(&lovname, log, "-mdtlov");
2267 if (mgs_log_is_empty(env, mgs, log)) {
2268 rc = mgs_write_log_lov(env, mgs, fsdb, mti, log, lovname);
2273 sprintf(mdt_index, "%d", mti->mti_stripe_index);
2275 rc = record_start_log(env, mgs, &llh, log);
2279 /* add MDT itself */
2281 /* FIXME this whole fn should be a single journal transaction */
2282 sprintf(uuid, "%s_UUID", log);
2283 rc = record_marker(env, llh, fsdb, CM_START, log, "add mdt");
2286 rc = record_attach(env, llh, log, LUSTRE_MDT_NAME, uuid);
2289 rc = record_mount_opt(env, llh, log, lovname, NULL);
2292 rc = record_setup(env, llh, log, uuid, mdt_index, lovname,
2293 failout ? "n" : "f");
2296 rc = record_marker(env, llh, fsdb, CM_END, log, "add mdt");
2300 record_end_log(env, &llh);
2302 name_destroy(&lovname);
2304 OBD_FREE(uuid, sizeof(struct obd_uuid));
2308 /* envelope method for all layers log */
2309 static int mgs_write_log_mdt(const struct lu_env *env,
2310 struct mgs_device *mgs,
2312 struct mgs_target_info *mti)
2314 struct mgs_thread_info *mgi = mgs_env_info(env);
2315 struct llog_handle *llh = NULL;
2320 CDEBUG(D_MGS, "writing new mdt %s\n", mti->mti_svname);
2322 if (mti->mti_uuid[0] == '\0') {
2323 /* Make up our own uuid */
2324 snprintf(mti->mti_uuid, sizeof(mti->mti_uuid),
2325 "%s_UUID", mti->mti_svname);
2329 rc = mgs_write_log_mdt0(env, mgs, fsdb, mti);
2332 /* Append the mdt info to the client log */
2333 rc = name_create(&cliname, mti->mti_fsname, "-client");
2337 if (mgs_log_is_empty(env, mgs, cliname)) {
2338 /* Start client log */
2339 rc = mgs_write_log_lov(env, mgs, fsdb, mti, cliname,
2343 rc = mgs_write_log_lmv(env, mgs, fsdb, mti, cliname,
2350 #09 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0: 1:uml1_UUID
2351 #10 L attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:1d834_MNT_client_03f
2352 #11 L setup 0:MDC_uml1_mdsA_MNT_client 1:mdsA_UUID 2:uml1_UUID
2353 #12 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0: 1:uml2_UUID
2354 #13 L add_conn 0:MDC_uml1_mdsA_MNT_client 1:uml2_UUID
2355 #14 L mount_option 0: 1:client 2:lov1 3:MDC_uml1_mdsA_MNT_client
2358 /* copy client info about lov/lmv */
2359 mgi->mgi_comp.comp_mti = mti;
2360 mgi->mgi_comp.comp_fsdb = fsdb;
2362 rc = mgs_steal_llog_for_mdt_from_client(env, mgs, cliname,
2366 rc = mgs_write_log_mdc_to_lmv(env, mgs, fsdb, mti, cliname,
2372 rc = record_start_log(env, mgs, &llh, cliname);
2376 rc = record_marker(env, llh, fsdb, CM_START, cliname,
2380 rc = record_mount_opt(env, llh, cliname, fsdb->fsdb_clilov,
2384 rc = record_marker(env, llh, fsdb, CM_END, cliname,
2390 /* for_all_existing_mdt except current one */
2391 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
2392 if (i != mti->mti_stripe_index &&
2393 test_bit(i, fsdb->fsdb_mdt_index_map)) {
2396 rc = name_create_mdt(&logname, fsdb->fsdb_name, i);
2400 rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb, mti,
2402 name_destroy(&logname);
2408 record_end_log(env, &llh);
2410 name_destroy(&cliname);
2414 /* Add the ost info to the client/mdt lov */
2415 static int mgs_write_log_osc_to_lov(const struct lu_env *env,
2416 struct mgs_device *mgs, struct fs_db *fsdb,
2417 struct mgs_target_info *mti,
2418 char *logname, char *suffix, char *lovname,
2419 enum lustre_sec_part sec_part, int flags)
2421 struct llog_handle *llh = NULL;
2422 char *nodeuuid = NULL;
2423 char *oscname = NULL;
2424 char *oscuuid = NULL;
2425 char *lovuuid = NULL;
2426 char *svname = NULL;
2431 CDEBUG(D_INFO, "adding osc for %s to log %s\n",
2432 mti->mti_svname, logname);
2434 if (mgs_log_is_empty(env, mgs, logname)) {
2435 CERROR("log is empty! Logical error\n");
2439 rc = name_create(&nodeuuid, libcfs_nid2str(mti->mti_nids[0]), "");
2442 rc = name_create(&svname, mti->mti_svname, "-osc");
2446 /* for the system upgraded from old 1.8, keep using the old osc naming
2447 * style for mdt, see name_create_mdt_osc(). LU-1257 */
2448 if (test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2449 rc = name_create(&oscname, svname, "");
2451 rc = name_create(&oscname, svname, suffix);
2455 rc = name_create(&oscuuid, oscname, "_UUID");
2458 rc = name_create(&lovuuid, lovname, "_UUID");
2464 #03 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0: 1:uml1_UUID
2466 #04 L add_uuid nid=1@elan(0x1000000000001) nal=90 0: 1:uml1_UUID
2467 #04 L attach 0:OSC_uml1_ost1_MNT_client 1:osc 2:89070_lov1_a41dff51a
2468 #05 L setup 0:OSC_uml1_ost1_MNT_client 1:ost1_UUID 2:uml1_UUID
2470 #06 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0: 1:uml2_UUID
2471 #07 L add_conn 0:OSC_uml1_ost1_MNT_client 1:uml2_UUID
2472 #08 L lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1
2475 rc = record_start_log(env, mgs, &llh, logname);
2479 /* FIXME these should be a single journal transaction */
2480 rc = record_marker(env, llh, fsdb, CM_START | flags, mti->mti_svname,
2485 /* NB: don't change record order, because upon MDT steal OSC config
2486 * from client, it treats all nids before LCFG_SETUP as target nids
2487 * (multiple interfaces), while nids after as failover node nids.
2488 * See mgs_steal_llog_handler() LCFG_ADD_UUID.
2490 for (i = 0; i < mti->mti_nid_count; i++) {
2491 CDEBUG(D_MGS, "add nid %s\n", libcfs_nid2str(mti->mti_nids[i]));
2492 rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid);
2496 rc = record_attach(env, llh, oscname, LUSTRE_OSC_NAME, lovuuid);
2499 rc = record_setup(env, llh, oscname, mti->mti_uuid, nodeuuid, 0, 0);
2502 rc = mgs_write_log_failnids(env, mti, llh, oscname);
2506 snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
2508 rc = record_lov_add(env, llh, lovname, mti->mti_uuid, index, "1");
2511 rc = record_marker(env, llh, fsdb, CM_END | flags, mti->mti_svname,
2516 record_end_log(env, &llh);
2518 name_destroy(&lovuuid);
2519 name_destroy(&oscuuid);
2520 name_destroy(&oscname);
2521 name_destroy(&svname);
2522 name_destroy(&nodeuuid);
2526 static int mgs_write_log_ost(const struct lu_env *env,
2527 struct mgs_device *mgs, struct fs_db *fsdb,
2528 struct mgs_target_info *mti)
2530 struct llog_handle *llh = NULL;
2531 char *logname, *lovname;
2532 char *ptr = mti->mti_params;
2533 int rc, flags = 0, failout = 0, i;
2536 CDEBUG(D_MGS, "writing new ost %s\n", mti->mti_svname);
2538 /* The ost startup log */
2540 /* If the ost log already exists, that means that someone reformatted
2541 the ost and it called target_add again. */
2542 if (!mgs_log_is_empty(env, mgs, mti->mti_svname)) {
2543 LCONSOLE_ERROR_MSG(0x141, "The config log for %s already "
2544 "exists, yet the server claims it never "
2545 "registered. It may have been reformatted, "
2546 "or the index changed. writeconf the MDT to "
2547 "regenerate all logs.\n", mti->mti_svname);
2552 attach obdfilter ost1 ost1_UUID
2553 setup /dev/loop2 ldiskfs f|n errors=remount-ro,user_xattr
2555 if (class_find_param(ptr, PARAM_FAILMODE, &ptr) == 0)
2556 failout = (strncmp(ptr, "failout", 7) == 0);
2557 rc = record_start_log(env, mgs, &llh, mti->mti_svname);
2560 /* FIXME these should be a single journal transaction */
2561 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,"add ost");
2564 if (*mti->mti_uuid == '\0')
2565 snprintf(mti->mti_uuid, sizeof(mti->mti_uuid),
2566 "%s_UUID", mti->mti_svname);
2567 rc = record_attach(env, llh, mti->mti_svname,
2568 "obdfilter"/*LUSTRE_OST_NAME*/, mti->mti_uuid);
2571 rc = record_setup(env, llh, mti->mti_svname,
2572 "dev"/*ignored*/, "type"/*ignored*/,
2573 failout ? "n" : "f", 0/*options*/);
2576 rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname, "add ost");
2580 record_end_log(env, &llh);
2583 /* We also have to update the other logs where this osc is part of
2586 if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags)) {
2587 /* If we're upgrading, the old mdt log already has our
2588 entry. Let's do a fake one for fun. */
2589 /* Note that we can't add any new failnids, since we don't
2590 know the old osc names. */
2591 flags = CM_SKIP | CM_UPGRADE146;
2593 } else if ((mti->mti_flags & LDD_F_UPDATE) != LDD_F_UPDATE) {
2594 /* If the update flag isn't set, don't update client/mdt
2597 LCONSOLE_WARN("Client log for %s was not updated; writeconf "
2598 "the MDT first to regenerate it.\n",
2602 /* Add ost to all MDT lov defs */
2603 for (i = 0; i < INDEX_MAP_SIZE * 8; i++){
2604 if (test_bit(i, fsdb->fsdb_mdt_index_map)) {
2607 rc = name_create_mdt_and_lov(&logname, &lovname, fsdb,
2611 sprintf(mdt_index, "-MDT%04x", i);
2612 rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, mti,
2614 lovname, LUSTRE_SP_MDT,
2616 name_destroy(&logname);
2617 name_destroy(&lovname);
2623 /* Append ost info to the client log */
2624 rc = name_create(&logname, mti->mti_fsname, "-client");
2627 if (mgs_log_is_empty(env, mgs, logname)) {
2628 /* Start client log */
2629 rc = mgs_write_log_lov(env, mgs, fsdb, mti, logname,
2633 rc = mgs_write_log_lmv(env, mgs, fsdb, mti, logname,
2638 rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, mti, logname, "",
2639 fsdb->fsdb_clilov, LUSTRE_SP_CLI, 0);
2641 name_destroy(&logname);
2645 static __inline__ int mgs_param_empty(char *ptr)
2649 if ((tmp = strchr(ptr, '=')) && (*(++tmp) == '\0'))
2654 static int mgs_write_log_failnid_internal(const struct lu_env *env,
2655 struct mgs_device *mgs,
2657 struct mgs_target_info *mti,
2658 char *logname, char *cliname)
2661 struct llog_handle *llh = NULL;
2663 if (mgs_param_empty(mti->mti_params)) {
2664 /* Remove _all_ failnids */
2665 rc = mgs_modify(env, mgs, fsdb, mti, logname,
2666 mti->mti_svname, "add failnid", CM_SKIP);
2667 return rc < 0 ? rc : 0;
2670 /* Otherwise failover nids are additive */
2671 rc = record_start_log(env, mgs, &llh, logname);
2674 /* FIXME this should be a single journal transaction */
2675 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
2679 rc = mgs_write_log_failnids(env, mti, llh, cliname);
2682 rc = record_marker(env, llh, fsdb, CM_END,
2683 mti->mti_svname, "add failnid");
2685 record_end_log(env, &llh);
2690 /* Add additional failnids to an existing log.
2691 The mdc/osc must have been added to logs first */
2692 /* tcp nids must be in dotted-quad ascii -
2693 we can't resolve hostnames from the kernel. */
2694 static int mgs_write_log_add_failnid(const struct lu_env *env,
2695 struct mgs_device *mgs,
2697 struct mgs_target_info *mti)
2699 char *logname, *cliname;
2703 /* FIXME we currently can't erase the failnids
2704 * given when a target first registers, since they aren't part of
2705 * an "add uuid" stanza */
2707 /* Verify that we know about this target */
2708 if (mgs_log_is_empty(env, mgs, mti->mti_svname)) {
2709 LCONSOLE_ERROR_MSG(0x142, "The target %s has not registered "
2710 "yet. It must be started before failnids "
2711 "can be added.\n", mti->mti_svname);
2715 /* Create mdc/osc client name (e.g. lustre-OST0001-osc) */
2716 if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
2717 rc = name_create(&cliname, mti->mti_svname, "-mdc");
2718 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
2719 rc = name_create(&cliname, mti->mti_svname, "-osc");
2725 /* Add failover nids to the client log */
2726 rc = name_create(&logname, mti->mti_fsname, "-client");
2728 name_destroy(&cliname);
2731 rc = mgs_write_log_failnid_internal(env, mgs, fsdb,mti,logname,cliname);
2732 name_destroy(&logname);
2733 name_destroy(&cliname);
2737 if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
2738 /* Add OST failover nids to the MDT logs as well */
2741 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
2742 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
2744 rc = name_create_mdt(&logname, mti->mti_fsname, i);
2747 rc = name_create_mdt_osc(&cliname, mti->mti_svname,
2750 name_destroy(&logname);
2753 rc = mgs_write_log_failnid_internal(env, mgs, fsdb,
2756 name_destroy(&cliname);
2757 name_destroy(&logname);
2766 static int mgs_wlp_lcfg(const struct lu_env *env,
2767 struct mgs_device *mgs, struct fs_db *fsdb,
2768 struct mgs_target_info *mti,
2769 char *logname, struct lustre_cfg_bufs *bufs,
2770 char *tgtname, char *ptr)
2772 char comment[MTI_NAME_MAXLEN];
2774 struct lustre_cfg *lcfg;
2777 /* Erase any old settings of this same parameter */
2778 memcpy(comment, ptr, MTI_NAME_MAXLEN);
2779 comment[MTI_NAME_MAXLEN - 1] = 0;
2780 /* But don't try to match the value. */
2781 if ((tmp = strchr(comment, '=')))
2783 /* FIXME we should skip settings that are the same as old values */
2784 rc = mgs_modify(env, mgs, fsdb, mti, logname, tgtname, comment,CM_SKIP);
2787 del = mgs_param_empty(ptr);
2789 LCONSOLE_INFO("%sing parameter %s.%s in log %s\n", del ? "Disabl" : rc ?
2790 "Sett" : "Modify", tgtname, comment, logname);
2794 lustre_cfg_bufs_reset(bufs, tgtname);
2795 lustre_cfg_bufs_set_string(bufs, 1, ptr);
2796 lcfg = lustre_cfg_new(LCFG_PARAM, bufs);
2799 rc = mgs_write_log_direct(env, mgs, fsdb, logname,lcfg,tgtname,comment);
2800 lustre_cfg_free(lcfg);
2804 /* write global variable settings into log */
2805 static int mgs_write_log_sys(const struct lu_env *env,
2806 struct mgs_device *mgs, struct fs_db *fsdb,
2807 struct mgs_target_info *mti, char *sys, char *ptr)
2809 struct mgs_thread_info *mgi = mgs_env_info(env);
2810 struct lustre_cfg *lcfg;
2812 int rc, cmd, convert = 1;
2814 if (class_match_param(ptr, PARAM_TIMEOUT, &tmp) == 0) {
2815 cmd = LCFG_SET_TIMEOUT;
2816 } else if (class_match_param(ptr, PARAM_LDLM_TIMEOUT, &tmp) == 0) {
2817 cmd = LCFG_SET_LDLM_TIMEOUT;
2818 /* Check for known params here so we can return error to lctl */
2819 } else if ((class_match_param(ptr, PARAM_AT_MIN, &tmp) == 0) ||
2820 (class_match_param(ptr, PARAM_AT_MAX, &tmp) == 0) ||
2821 (class_match_param(ptr, PARAM_AT_EXTRA, &tmp) == 0) ||
2822 (class_match_param(ptr, PARAM_AT_EARLY_MARGIN, &tmp) == 0) ||
2823 (class_match_param(ptr, PARAM_AT_HISTORY, &tmp) == 0)) {
2825 } else if (class_match_param(ptr, PARAM_JOBID_VAR, &tmp) == 0) {
2826 convert = 0; /* Don't convert string value to integer */
2832 if (mgs_param_empty(ptr))
2833 CDEBUG(D_MGS, "global '%s' removed\n", sys);
2835 CDEBUG(D_MGS, "global '%s' val=%s\n", sys, tmp);
2837 lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL);
2838 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, sys);
2839 if (!convert && *tmp != '\0')
2840 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 2, tmp);
2841 lcfg = lustre_cfg_new(cmd, &mgi->mgi_bufs);
2842 lcfg->lcfg_num = convert ? simple_strtoul(tmp, NULL, 0) : 0;
2843 /* truncate the comment to the parameter name */
2847 /* modify all servers and clients */
2848 rc = mgs_write_log_direct_all(env, mgs, fsdb, mti,
2849 *tmp == '\0' ? NULL : lcfg,
2850 mti->mti_fsname, sys, 0);
2851 if (rc == 0 && *tmp != '\0') {
2853 case LCFG_SET_TIMEOUT:
2854 if (!obd_timeout_set || lcfg->lcfg_num > obd_timeout)
2855 class_process_config(lcfg);
2857 case LCFG_SET_LDLM_TIMEOUT:
2858 if (!ldlm_timeout_set || lcfg->lcfg_num > ldlm_timeout)
2859 class_process_config(lcfg);
2866 lustre_cfg_free(lcfg);
2870 /* write quota settings into log */
2871 static int mgs_write_log_quota(const struct lu_env *env, struct mgs_device *mgs,
2872 struct fs_db *fsdb, struct mgs_target_info *mti,
2873 char *quota, char *ptr)
2875 struct mgs_thread_info *mgi = mgs_env_info(env);
2876 struct lustre_cfg *lcfg;
2879 int rc, cmd = LCFG_PARAM;
2881 /* support only 'meta' and 'data' pools so far */
2882 if (class_match_param(ptr, QUOTA_METAPOOL_NAME, &tmp) != 0 &&
2883 class_match_param(ptr, QUOTA_DATAPOOL_NAME, &tmp) != 0) {
2884 CERROR("parameter quota.%s isn't supported (only quota.mdt "
2885 "& quota.ost are)\n", ptr);
2890 CDEBUG(D_MGS, "global '%s' removed\n", quota);
2892 CDEBUG(D_MGS, "global '%s'\n", quota);
2894 if (strchr(tmp, 'u') == NULL && strchr(tmp, 'g') == NULL &&
2895 strcmp(tmp, "none") != 0) {
2896 CERROR("enable option(%s) isn't supported\n", tmp);
2901 lustre_cfg_bufs_reset(&mgi->mgi_bufs, mti->mti_fsname);
2902 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, quota);
2903 lcfg = lustre_cfg_new(cmd, &mgi->mgi_bufs);
2904 /* truncate the comment to the parameter name */
2909 /* XXX we duplicated quota enable information in all server
2910 * config logs, it should be moved to a separate config
2911 * log once we cleanup the config log for global param. */
2912 /* modify all servers */
2913 rc = mgs_write_log_direct_all(env, mgs, fsdb, mti,
2914 *tmp == '\0' ? NULL : lcfg,
2915 mti->mti_fsname, quota, 1);
2917 lustre_cfg_free(lcfg);
2918 return rc < 0 ? rc : 0;
2921 static int mgs_srpc_set_param_disk(const struct lu_env *env,
2922 struct mgs_device *mgs,
2924 struct mgs_target_info *mti,
2927 struct mgs_thread_info *mgi = mgs_env_info(env);
2928 struct llog_handle *llh = NULL;
2930 char *comment, *ptr;
2931 struct lustre_cfg *lcfg;
2936 ptr = strchr(param, '=');
2940 OBD_ALLOC(comment, len + 1);
2941 if (comment == NULL)
2943 strncpy(comment, param, len);
2944 comment[len] = '\0';
2947 lustre_cfg_bufs_reset(&mgi->mgi_bufs, mti->mti_svname);
2948 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, param);
2949 lcfg = lustre_cfg_new(LCFG_SPTLRPC_CONF, &mgi->mgi_bufs);
2951 GOTO(out_comment, rc = -ENOMEM);
2953 /* construct log name */
2954 rc = name_create(&logname, mti->mti_fsname, "-sptlrpc");
2958 if (mgs_log_is_empty(env, mgs, logname)) {
2959 rc = record_start_log(env, mgs, &llh, logname);
2962 record_end_log(env, &llh);
2965 /* obsolete old one */
2966 rc = mgs_modify(env, mgs, fsdb, mti, logname, mti->mti_svname,
2970 /* write the new one */
2971 rc = mgs_write_log_direct(env, mgs, fsdb, logname, lcfg,
2972 mti->mti_svname, comment);
2974 CERROR("err %d writing log %s\n", rc, logname);
2976 name_destroy(&logname);
2978 lustre_cfg_free(lcfg);
2980 OBD_FREE(comment, len + 1);
2984 static int mgs_srpc_set_param_udesc_mem(struct fs_db *fsdb,
2989 /* disable the adjustable udesc parameter for now, i.e. use default
2990 * setting that client always ship udesc to MDT if possible. to enable
2991 * it simply remove the following line */
2994 ptr = strchr(param, '=');
2999 if (strcmp(param, PARAM_SRPC_UDESC))
3002 if (strcmp(ptr, "yes") == 0) {
3003 set_bit(FSDB_UDESC, &fsdb->fsdb_flags);
3004 CWARN("Enable user descriptor shipping from client to MDT\n");
3005 } else if (strcmp(ptr, "no") == 0) {
3006 clear_bit(FSDB_UDESC, &fsdb->fsdb_flags);
3007 CWARN("Disable user descriptor shipping from client to MDT\n");
3015 CERROR("Invalid param: %s\n", param);
3019 static int mgs_srpc_set_param_mem(struct fs_db *fsdb,
3023 struct sptlrpc_rule rule;
3024 struct sptlrpc_rule_set *rset;
3028 if (strncmp(param, PARAM_SRPC, sizeof(PARAM_SRPC) - 1) != 0) {
3029 CERROR("Invalid sptlrpc parameter: %s\n", param);
3033 if (strncmp(param, PARAM_SRPC_UDESC,
3034 sizeof(PARAM_SRPC_UDESC) - 1) == 0) {
3035 RETURN(mgs_srpc_set_param_udesc_mem(fsdb, param));
3038 if (strncmp(param, PARAM_SRPC_FLVR, sizeof(PARAM_SRPC_FLVR) - 1) != 0) {
3039 CERROR("Invalid sptlrpc flavor parameter: %s\n", param);
3043 param += sizeof(PARAM_SRPC_FLVR) - 1;
3045 rc = sptlrpc_parse_rule(param, &rule);
3049 /* mgs rules implies must be mgc->mgs */
3050 if (test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
3051 if ((rule.sr_from != LUSTRE_SP_MGC &&
3052 rule.sr_from != LUSTRE_SP_ANY) ||
3053 (rule.sr_to != LUSTRE_SP_MGS &&
3054 rule.sr_to != LUSTRE_SP_ANY))
3058 /* preapre room for this coming rule. svcname format should be:
3059 * - fsname: general rule
3060 * - fsname-tgtname: target-specific rule
3062 if (strchr(svname, '-')) {
3063 struct mgs_tgt_srpc_conf *tgtconf;
3066 for (tgtconf = fsdb->fsdb_srpc_tgt; tgtconf != NULL;
3067 tgtconf = tgtconf->mtsc_next) {
3068 if (!strcmp(tgtconf->mtsc_tgt, svname)) {
3077 OBD_ALLOC_PTR(tgtconf);
3078 if (tgtconf == NULL)
3081 name_len = strlen(svname);
3083 OBD_ALLOC(tgtconf->mtsc_tgt, name_len + 1);
3084 if (tgtconf->mtsc_tgt == NULL) {
3085 OBD_FREE_PTR(tgtconf);
3088 memcpy(tgtconf->mtsc_tgt, svname, name_len);
3090 tgtconf->mtsc_next = fsdb->fsdb_srpc_tgt;
3091 fsdb->fsdb_srpc_tgt = tgtconf;
3094 rset = &tgtconf->mtsc_rset;
3096 rset = &fsdb->fsdb_srpc_gen;
3099 rc = sptlrpc_rule_set_merge(rset, &rule);
3104 static int mgs_srpc_set_param(const struct lu_env *env,
3105 struct mgs_device *mgs,
3107 struct mgs_target_info *mti,
3117 /* keep a copy of original param, which could be destroied
3119 copy_size = strlen(param) + 1;
3120 OBD_ALLOC(copy, copy_size);
3123 memcpy(copy, param, copy_size);
3125 rc = mgs_srpc_set_param_mem(fsdb, mti->mti_svname, param);
3129 /* previous steps guaranteed the syntax is correct */
3130 rc = mgs_srpc_set_param_disk(env, mgs, fsdb, mti, copy);
3134 if (test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
3136 * for mgs rules, make them effective immediately.
3138 LASSERT(fsdb->fsdb_srpc_tgt == NULL);
3139 sptlrpc_target_update_exp_flavor(mgs->mgs_obd,
3140 &fsdb->fsdb_srpc_gen);
3144 OBD_FREE(copy, copy_size);
3148 struct mgs_srpc_read_data {
3149 struct fs_db *msrd_fsdb;
3153 static int mgs_srpc_read_handler(const struct lu_env *env,
3154 struct llog_handle *llh,
3155 struct llog_rec_hdr *rec, void *data)
3157 struct mgs_srpc_read_data *msrd = data;
3158 struct cfg_marker *marker;
3159 struct lustre_cfg *lcfg = REC_DATA(rec);
3160 char *svname, *param;
3164 if (rec->lrh_type != OBD_CFG_REC) {
3165 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
3169 cfg_len = rec->lrh_len - sizeof(struct llog_rec_hdr) -
3170 sizeof(struct llog_rec_tail);
3172 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
3174 CERROR("Insane cfg\n");
3178 if (lcfg->lcfg_command == LCFG_MARKER) {
3179 marker = lustre_cfg_buf(lcfg, 1);
3181 if (marker->cm_flags & CM_START &&
3182 marker->cm_flags & CM_SKIP)
3183 msrd->msrd_skip = 1;
3184 if (marker->cm_flags & CM_END)
3185 msrd->msrd_skip = 0;
3190 if (msrd->msrd_skip)
3193 if (lcfg->lcfg_command != LCFG_SPTLRPC_CONF) {
3194 CERROR("invalid command (%x)\n", lcfg->lcfg_command);
3198 svname = lustre_cfg_string(lcfg, 0);
3199 if (svname == NULL) {
3200 CERROR("svname is empty\n");
3204 param = lustre_cfg_string(lcfg, 1);
3205 if (param == NULL) {
3206 CERROR("param is empty\n");
3210 rc = mgs_srpc_set_param_mem(msrd->msrd_fsdb, svname, param);
3212 CERROR("read sptlrpc record error (%d): %s\n", rc, param);
3217 int mgs_get_fsdb_srpc_from_llog(const struct lu_env *env,
3218 struct mgs_device *mgs,
3221 struct llog_handle *llh = NULL;
3222 struct llog_ctxt *ctxt;
3224 struct mgs_srpc_read_data msrd;
3228 /* construct log name */
3229 rc = name_create(&logname, fsdb->fsdb_name, "-sptlrpc");
3233 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
3234 LASSERT(ctxt != NULL);
3236 if (mgs_log_is_empty(env, mgs, logname))
3239 rc = llog_open(env, ctxt, &llh, NULL, logname,
3247 rc = llog_init_handle(env, llh, LLOG_F_IS_PLAIN, NULL);
3249 GOTO(out_close, rc);
3251 if (llog_get_size(llh) <= 1)
3252 GOTO(out_close, rc = 0);
3254 msrd.msrd_fsdb = fsdb;
3257 rc = llog_process(env, llh, mgs_srpc_read_handler, (void *)&msrd,
3261 llog_close(env, llh);
3263 llog_ctxt_put(ctxt);
3264 name_destroy(&logname);
3267 CERROR("failed to read sptlrpc config database: %d\n", rc);
3271 /* Permanent settings of all parameters by writing into the appropriate
3272 * configuration logs.
3273 * A parameter with null value ("<param>='\0'") means to erase it out of
3276 static int mgs_write_log_param(const struct lu_env *env,
3277 struct mgs_device *mgs, struct fs_db *fsdb,
3278 struct mgs_target_info *mti, char *ptr)
3280 struct mgs_thread_info *mgi = mgs_env_info(env);
3283 int rc = 0, rc2 = 0;
3286 /* For various parameter settings, we have to figure out which logs
3287 care about them (e.g. both mdt and client for lov settings) */
3288 CDEBUG(D_MGS, "next param '%s'\n", ptr);
3290 /* The params are stored in MOUNT_DATA_FILE and modified via
3291 tunefs.lustre, or set using lctl conf_param */
3293 /* Processed in lustre_start_mgc */
3294 if (class_match_param(ptr, PARAM_MGSNODE, NULL) == 0)
3297 /* Processed in ost/mdt */
3298 if (class_match_param(ptr, PARAM_NETWORK, NULL) == 0)
3301 /* Processed in mgs_write_log_ost */
3302 if (class_match_param(ptr, PARAM_FAILMODE, NULL) == 0) {
3303 if (mti->mti_flags & LDD_F_PARAM) {
3304 LCONSOLE_ERROR_MSG(0x169, "%s can only be "
3305 "changed with tunefs.lustre"
3306 "and --writeconf\n", ptr);
3312 if (class_match_param(ptr, PARAM_SRPC, NULL) == 0) {
3313 rc = mgs_srpc_set_param(env, mgs, fsdb, mti, ptr);
3317 if (class_match_param(ptr, PARAM_FAILNODE, NULL) == 0) {
3318 /* Add a failover nidlist */
3320 /* We already processed failovers params for new
3321 targets in mgs_write_log_target */
3322 if (mti->mti_flags & LDD_F_PARAM) {
3323 CDEBUG(D_MGS, "Adding failnode\n");
3324 rc = mgs_write_log_add_failnid(env, mgs, fsdb, mti);
3329 if (class_match_param(ptr, PARAM_SYS, &tmp) == 0) {
3330 rc = mgs_write_log_sys(env, mgs, fsdb, mti, ptr, tmp);
3334 if (class_match_param(ptr, PARAM_QUOTA, &tmp) == 0) {
3335 rc = mgs_write_log_quota(env, mgs, fsdb, mti, ptr, tmp);
3339 if (class_match_param(ptr, PARAM_OSC""PARAM_ACTIVE, &tmp) == 0) {
3340 /* active=0 means off, anything else means on */
3341 int flag = (*tmp == '0') ? CM_EXCLUDE : 0;
3344 if (!(mti->mti_flags & LDD_F_SV_TYPE_OST)) {
3345 LCONSOLE_ERROR_MSG(0x144, "%s: Only OSCs can "
3346 "be (de)activated.\n",
3348 GOTO(end, rc = -EINVAL);
3350 LCONSOLE_WARN("Permanently %sactivating %s\n",
3351 flag ? "de": "re", mti->mti_svname);
3353 rc = name_create(&logname, mti->mti_fsname, "-client");
3356 rc = mgs_modify(env, mgs, fsdb, mti, logname,
3357 mti->mti_svname, "add osc", flag);
3358 name_destroy(&logname);
3362 /* Add to all MDT logs for CMD */
3363 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3364 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
3366 rc = name_create_mdt(&logname, mti->mti_fsname, i);
3369 rc = mgs_modify(env, mgs, fsdb, mti, logname,
3370 mti->mti_svname, "add osc", flag);
3371 name_destroy(&logname);
3377 LCONSOLE_ERROR_MSG(0x145, "Couldn't find %s in"
3378 "log (%d). No permanent "
3379 "changes were made to the "
3381 mti->mti_svname, rc);
3382 if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags))
3383 LCONSOLE_ERROR_MSG(0x146, "This may be"
3388 "update the logs.\n");
3391 /* Fall through to osc proc for deactivating live OSC
3392 on running MDT / clients. */
3394 /* Below here, let obd's XXX_process_config methods handle it */
3396 /* All lov. in proc */
3397 if (class_match_param(ptr, PARAM_LOV, NULL) == 0) {
3400 CDEBUG(D_MGS, "lov param %s\n", ptr);
3401 if (!(mti->mti_flags & LDD_F_SV_TYPE_MDT)) {
3402 LCONSOLE_ERROR_MSG(0x147, "LOV params must be "
3403 "set on the MDT, not %s. "
3410 if (mgs_log_is_empty(env, mgs, mti->mti_svname))
3411 GOTO(end, rc = -ENODEV);
3413 rc = name_create_mdt_and_lov(&logname, &mdtlovname, fsdb,
3414 mti->mti_stripe_index);
3417 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, mti->mti_svname,
3418 &mgi->mgi_bufs, mdtlovname, ptr);
3419 name_destroy(&logname);
3420 name_destroy(&mdtlovname);
3425 rc = name_create(&logname, mti->mti_fsname, "-client");
3428 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname, &mgi->mgi_bufs,
3429 fsdb->fsdb_clilov, ptr);
3430 name_destroy(&logname);
3434 /* All osc., mdc., llite. params in proc */
3435 if ((class_match_param(ptr, PARAM_OSC, NULL) == 0) ||
3436 (class_match_param(ptr, PARAM_MDC, NULL) == 0) ||
3437 (class_match_param(ptr, PARAM_LLITE, NULL) == 0)) {
3440 if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags)) {
3441 LCONSOLE_ERROR_MSG(0x148, "Upgraded client logs for %s"
3442 " cannot be modified. Consider"
3443 " updating the configuration with"
3446 GOTO(end, rc = -EINVAL);
3448 if (memcmp(ptr, PARAM_LLITE, strlen(PARAM_LLITE)) == 0) {
3449 rc = name_create(&cname, mti->mti_fsname, "-client");
3450 /* Add the client type to match the obdname in
3451 class_config_llog_handler */
3452 } else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
3453 rc = name_create(&cname, mti->mti_svname, "-mdc");
3454 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
3455 rc = name_create(&cname, mti->mti_svname, "-osc");
3457 GOTO(end, rc = -EINVAL);
3462 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
3465 rc = name_create(&logname, mti->mti_fsname, "-client");
3467 name_destroy(&cname);
3470 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname, &mgi->mgi_bufs,
3473 /* osc params affect the MDT as well */
3474 if (!rc && (mti->mti_flags & LDD_F_SV_TYPE_OST)) {
3477 for (i = 0; i < INDEX_MAP_SIZE * 8; i++){
3478 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
3480 name_destroy(&cname);
3481 rc = name_create_mdt_osc(&cname, mti->mti_svname,
3483 name_destroy(&logname);
3486 rc = name_create_mdt(&logname,
3487 mti->mti_fsname, i);
3490 if (!mgs_log_is_empty(env, mgs, logname)) {
3491 rc = mgs_wlp_lcfg(env, mgs, fsdb,
3500 name_destroy(&logname);
3501 name_destroy(&cname);
3505 /* All mdt. params in proc */
3506 if (class_match_param(ptr, PARAM_MDT, NULL) == 0) {
3510 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
3511 if (strncmp(mti->mti_svname, mti->mti_fsname,
3512 MTI_NAME_MAXLEN) == 0)
3513 /* device is unspecified completely? */
3514 rc = LDD_F_SV_TYPE_MDT | LDD_F_SV_ALL;
3516 rc = server_name2index(mti->mti_svname, &idx, NULL);
3519 if ((rc & LDD_F_SV_TYPE_MDT) == 0)
3521 if (rc & LDD_F_SV_ALL) {
3522 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3524 fsdb->fsdb_mdt_index_map))
3526 rc = name_create_mdt(&logname,
3527 mti->mti_fsname, i);
3530 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti,
3531 logname, &mgi->mgi_bufs,
3533 name_destroy(&logname);
3538 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti,
3539 mti->mti_svname, &mgi->mgi_bufs,
3540 mti->mti_svname, ptr);
3547 /* All mdd., ost. params in proc */
3548 if ((class_match_param(ptr, PARAM_MDD, NULL) == 0) ||
3549 (class_match_param(ptr, PARAM_OST, NULL) == 0)) {
3550 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
3551 if (mgs_log_is_empty(env, mgs, mti->mti_svname))
3552 GOTO(end, rc = -ENODEV);
3554 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, mti->mti_svname,
3555 &mgi->mgi_bufs, mti->mti_svname, ptr);
3559 LCONSOLE_WARN("Ignoring unrecognized param '%s'\n", ptr);
3564 CERROR("err %d on param '%s'\n", rc, ptr);
3569 /* Not implementing automatic failover nid addition at this time. */
3570 int mgs_check_failnid(const struct lu_env *env, struct mgs_device *mgs,
3571 struct mgs_target_info *mti)
3578 rc = mgs_find_or_make_fsdb(obd, fsname, &fsdb);
3582 if (mgs_log_is_empty(obd, mti->mti_svname))
3583 /* should never happen */
3586 CDEBUG(D_MGS, "Checking for new failnids for %s\n", mti->mti_svname);
3588 /* FIXME We can just check mti->params to see if we're already in
3589 the failover list. Modify mti->params for rewriting back at
3590 server_register_target(). */
3592 mutex_lock(&fsdb->fsdb_mutex);
3593 rc = mgs_write_log_add_failnid(obd, fsdb, mti);
3594 mutex_unlock(&fsdb->fsdb_mutex);
3601 int mgs_write_log_target(const struct lu_env *env,
3602 struct mgs_device *mgs,
3603 struct mgs_target_info *mti,
3610 /* set/check the new target index */
3611 rc = mgs_set_index(env, mgs, mti);
3613 CERROR("Can't get index (%d)\n", rc);
3617 if (rc == EALREADY) {
3618 LCONSOLE_WARN("Found index %d for %s, updating log\n",
3619 mti->mti_stripe_index, mti->mti_svname);
3620 /* We would like to mark old log sections as invalid
3621 and add new log sections in the client and mdt logs.
3622 But if we add new sections, then live clients will
3623 get repeat setup instructions for already running
3624 osc's. So don't update the client/mdt logs. */
3625 mti->mti_flags &= ~LDD_F_UPDATE;
3628 mutex_lock(&fsdb->fsdb_mutex);
3630 if (mti->mti_flags &
3631 (LDD_F_VIRGIN | LDD_F_UPGRADE14 | LDD_F_WRITECONF)) {
3632 /* Generate a log from scratch */
3633 if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
3634 rc = mgs_write_log_mdt(env, mgs, fsdb, mti);
3635 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
3636 rc = mgs_write_log_ost(env, mgs, fsdb, mti);
3638 CERROR("Unknown target type %#x, can't create log for "
3639 "%s\n", mti->mti_flags, mti->mti_svname);
3642 CERROR("Can't write logs for %s (%d)\n",
3643 mti->mti_svname, rc);
3647 /* Just update the params from tunefs in mgs_write_log_params */
3648 CDEBUG(D_MGS, "Update params for %s\n", mti->mti_svname);
3649 mti->mti_flags |= LDD_F_PARAM;
3652 /* allocate temporary buffer, where class_get_next_param will
3653 make copy of a current parameter */
3654 OBD_ALLOC(buf, strlen(mti->mti_params) + 1);
3656 GOTO(out_up, rc = -ENOMEM);
3657 params = mti->mti_params;
3658 while (params != NULL) {
3659 rc = class_get_next_param(¶ms, buf);
3662 /* there is no next parameter, that is
3667 CDEBUG(D_MGS, "remaining string: '%s', param: '%s'\n",
3669 rc = mgs_write_log_param(env, mgs, fsdb, mti, buf);
3674 OBD_FREE(buf, strlen(mti->mti_params) + 1);
3677 mutex_unlock(&fsdb->fsdb_mutex);
3681 int mgs_erase_log(const struct lu_env *env, struct mgs_device *mgs, char *name)
3683 struct llog_ctxt *ctxt;
3686 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
3688 CERROR("%s: MGS config context doesn't exist\n",
3689 mgs->mgs_obd->obd_name);
3692 rc = llog_erase(env, ctxt, NULL, name);
3693 /* llog may not exist */
3696 llog_ctxt_put(ctxt);
3700 CERROR("%s: failed to clear log %s: %d\n",
3701 mgs->mgs_obd->obd_name, name, rc);
3706 /* erase all logs for the given fs */
3707 int mgs_erase_logs(const struct lu_env *env, struct mgs_device *mgs, char *fsname)
3711 struct mgs_direntry *dirent, *n;
3712 int rc, len = strlen(fsname);
3716 /* Find all the logs in the CONFIGS directory */
3717 rc = class_dentry_readdir(env, mgs, &list);
3721 mutex_lock(&mgs->mgs_mutex);
3723 /* Delete the fs db */
3724 fsdb = mgs_find_fsdb(mgs, fsname);
3726 mgs_free_fsdb(mgs, fsdb);
3728 mutex_unlock(&mgs->mgs_mutex);
3730 cfs_list_for_each_entry_safe(dirent, n, &list, list) {
3731 cfs_list_del(&dirent->list);
3732 suffix = strrchr(dirent->name, '-');
3733 if (suffix != NULL) {
3734 if ((len == suffix - dirent->name) &&
3735 (strncmp(fsname, dirent->name, len) == 0)) {
3736 CDEBUG(D_MGS, "Removing log %s\n",
3738 mgs_erase_log(env, mgs, dirent->name);
3741 mgs_direntry_free(dirent);
3747 /* from llog_swab */
3748 static void print_lustre_cfg(struct lustre_cfg *lcfg)
3753 CDEBUG(D_MGS, "lustre_cfg: %p\n", lcfg);
3754 CDEBUG(D_MGS, "\tlcfg->lcfg_version: %#x\n", lcfg->lcfg_version);
3756 CDEBUG(D_MGS, "\tlcfg->lcfg_command: %#x\n", lcfg->lcfg_command);
3757 CDEBUG(D_MGS, "\tlcfg->lcfg_num: %#x\n", lcfg->lcfg_num);
3758 CDEBUG(D_MGS, "\tlcfg->lcfg_flags: %#x\n", lcfg->lcfg_flags);
3759 CDEBUG(D_MGS, "\tlcfg->lcfg_nid: %s\n", libcfs_nid2str(lcfg->lcfg_nid));
3761 CDEBUG(D_MGS, "\tlcfg->lcfg_bufcount: %d\n", lcfg->lcfg_bufcount);
3762 if (lcfg->lcfg_bufcount < LUSTRE_CFG_MAX_BUFCOUNT)
3763 for (i = 0; i < lcfg->lcfg_bufcount; i++) {
3764 CDEBUG(D_MGS, "\tlcfg->lcfg_buflens[%d]: %d %s\n",
3765 i, lcfg->lcfg_buflens[i],
3766 lustre_cfg_string(lcfg, i));
3771 /* Set a permanent (config log) param for a target or fs
3772 * \param lcfg buf0 may contain the device (testfs-MDT0000) name
3773 * buf1 contains the single parameter
3775 int mgs_setparam(const struct lu_env *env, struct mgs_device *mgs,
3776 struct lustre_cfg *lcfg, char *fsname)
3779 struct mgs_target_info *mti;
3780 char *devname, *param;
3786 print_lustre_cfg(lcfg);
3788 /* lustre, lustre-mdtlov, lustre-client, lustre-MDT0000 */
3789 devname = lustre_cfg_string(lcfg, 0);
3790 param = lustre_cfg_string(lcfg, 1);
3792 /* Assume device name embedded in param:
3793 lustre-OST0000.osc.max_dirty_mb=32 */
3794 ptr = strchr(param, '.');
3802 LCONSOLE_ERROR_MSG(0x14d, "No target specified: %s\n", param);
3806 rc = mgs_parse_devname(devname, fsname, NULL);
3807 if (rc == 0 && !mgs_parse_devname(devname, NULL, &index)) {
3808 /* param related to llite isn't allowed to set by OST or MDT */
3809 if (rc == 0 && strncmp(param, PARAM_LLITE,
3810 sizeof(PARAM_LLITE)) == 0)
3813 /* assume devname is the fsname */
3814 memset(fsname, 0, MTI_NAME_MAXLEN);
3815 strncpy(fsname, devname, MTI_NAME_MAXLEN);
3816 fsname[MTI_NAME_MAXLEN - 1] = 0;
3818 CDEBUG(D_MGS, "setparam fs='%s' device='%s'\n", fsname, devname);
3820 rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
3823 if (!test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags) &&
3824 test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) {
3825 CERROR("No filesystem targets for %s. cfg_device from lctl "
3826 "is '%s'\n", fsname, devname);
3827 mgs_free_fsdb(mgs, fsdb);
3831 /* Create a fake mti to hold everything */
3834 GOTO(out, rc = -ENOMEM);
3835 strncpy(mti->mti_fsname, fsname, MTI_NAME_MAXLEN);
3836 strncpy(mti->mti_svname, devname, MTI_NAME_MAXLEN);
3837 strncpy(mti->mti_params, param, sizeof(mti->mti_params));
3838 rc = server_name2index(mti->mti_svname, &mti->mti_stripe_index, &tmp);
3840 /* Not a valid server; may be only fsname */
3843 /* Strip -osc or -mdc suffix from svname */
3844 if (server_make_name(rc, mti->mti_stripe_index, mti->mti_fsname,
3846 GOTO(out, rc = -EINVAL);
3848 mti->mti_flags = rc | LDD_F_PARAM;
3850 mutex_lock(&fsdb->fsdb_mutex);
3851 rc = mgs_write_log_param(env, mgs, fsdb, mti, mti->mti_params);
3852 mutex_unlock(&fsdb->fsdb_mutex);
3855 * Revoke lock so everyone updates. Should be alright if
3856 * someone was already reading while we were updating the logs,
3857 * so we don't really need to hold the lock while we're
3860 mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG);
3866 static int mgs_write_log_pool(const struct lu_env *env,
3867 struct mgs_device *mgs, char *logname,
3868 struct fs_db *fsdb, char *lovname,
3869 enum lcfg_command_type cmd,
3870 char *poolname, char *fsname,
3871 char *ostname, char *comment)
3873 struct llog_handle *llh = NULL;
3876 rc = record_start_log(env, mgs, &llh, logname);
3879 rc = record_marker(env, llh, fsdb, CM_START, lovname, comment);
3882 rc = record_base(env, llh, lovname, 0, cmd, poolname, fsname, ostname, 0);
3885 rc = record_marker(env, llh, fsdb, CM_END, lovname, comment);
3887 record_end_log(env, &llh);
3891 int mgs_pool_cmd(const struct lu_env *env, struct mgs_device *mgs,
3892 enum lcfg_command_type cmd, char *fsname,
3893 char *poolname, char *ostname)
3898 char *label = NULL, *canceled_label = NULL;
3900 struct mgs_target_info *mti = NULL;
3904 rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
3906 CERROR("Can't get db for %s\n", fsname);
3909 if (test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) {
3910 CERROR("%s is not defined\n", fsname);
3911 mgs_free_fsdb(mgs, fsdb);
3915 label_sz = 10 + strlen(fsname) + strlen(poolname);
3917 /* check if ostname match fsname */
3918 if (ostname != NULL) {
3921 ptr = strrchr(ostname, '-');
3922 if ((ptr == NULL) ||
3923 (strncmp(fsname, ostname, ptr-ostname) != 0))
3925 label_sz += strlen(ostname);
3928 OBD_ALLOC(label, label_sz);
3935 "new %s.%s", fsname, poolname);
3939 "add %s.%s.%s", fsname, poolname, ostname);
3942 OBD_ALLOC(canceled_label, label_sz);
3943 if (canceled_label == NULL)
3944 GOTO(out_label, rc = -ENOMEM);
3946 "rem %s.%s.%s", fsname, poolname, ostname);
3947 sprintf(canceled_label,
3948 "add %s.%s.%s", fsname, poolname, ostname);
3951 OBD_ALLOC(canceled_label, label_sz);
3952 if (canceled_label == NULL)
3953 GOTO(out_label, rc = -ENOMEM);
3955 "del %s.%s", fsname, poolname);
3956 sprintf(canceled_label,
3957 "new %s.%s", fsname, poolname);
3963 mutex_lock(&fsdb->fsdb_mutex);
3965 if (canceled_label != NULL) {
3968 GOTO(out_cancel, rc = -ENOMEM);
3971 /* write pool def to all MDT logs */
3972 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3973 if (test_bit(i, fsdb->fsdb_mdt_index_map)) {
3974 rc = name_create_mdt_and_lov(&logname, &lovname,
3977 mutex_unlock(&fsdb->fsdb_mutex);
3980 if (canceled_label != NULL) {
3981 strcpy(mti->mti_svname, "lov pool");
3982 rc = mgs_modify(env, mgs, fsdb, mti, logname,
3983 lovname, canceled_label,
3988 rc = mgs_write_log_pool(env, mgs, logname,
3992 name_destroy(&logname);
3993 name_destroy(&lovname);
3995 mutex_unlock(&fsdb->fsdb_mutex);
4001 rc = name_create(&logname, fsname, "-client");
4003 mutex_unlock(&fsdb->fsdb_mutex);
4006 if (canceled_label != NULL) {
4007 rc = mgs_modify(env, mgs, fsdb, mti, logname,
4008 fsdb->fsdb_clilov, canceled_label, CM_SKIP);
4010 mutex_unlock(&fsdb->fsdb_mutex);
4011 name_destroy(&logname);
4016 rc = mgs_write_log_pool(env, mgs, logname, fsdb, fsdb->fsdb_clilov,
4017 cmd, fsname, poolname, ostname, label);
4018 mutex_unlock(&fsdb->fsdb_mutex);
4019 name_destroy(&logname);
4020 /* request for update */
4021 mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG);
4028 if (canceled_label != NULL)
4029 OBD_FREE(canceled_label, label_sz);
4031 OBD_FREE(label, label_sz);