4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/mgs/mgs_llog.c
38 * Lustre Management Server (mgs) config llog creation
40 * Author: Nathan Rutman <nathan@clusterfs.com>
41 * Author: Alex Zhuravlev <bzzz@whamcloud.com>
42 * Author: Mikhail Pershin <tappro@whamcloud.com>
45 #define DEBUG_SUBSYSTEM S_MGS
46 #define D_MGS D_CONFIG
50 #include <lustre_param.h>
51 #include <lustre_sec.h>
52 #include <lustre_quota.h>
54 #include "mgs_internal.h"
56 /********************** Class functions ********************/
58 int class_dentry_readdir(const struct lu_env *env,
59 struct mgs_device *mgs, cfs_list_t *list)
61 struct dt_object *dir = mgs->mgs_configs_dir;
62 const struct dt_it_ops *iops;
64 struct mgs_direntry *de;
68 CFS_INIT_LIST_HEAD(list);
70 if (!dt_try_as_dir(env, dir))
71 GOTO(out, rc = -ENOTDIR);
74 LASSERT(dir->do_index_ops);
76 iops = &dir->do_index_ops->dio_it;
77 it = iops->init(env, dir, LUDA_64BITHASH, BYPASS_CAPA);
81 rc = iops->load(env, it, 0);
87 key = (void *)iops->key(env, it);
89 CERROR("%s: key failed when listing %s: rc = %d\n",
90 mgs->mgs_obd->obd_name, MOUNT_CONFIGS_DIR,
94 key_sz = iops->key_size(env, it);
97 /* filter out "." and ".." entries */
101 if (key_sz == 2 && key[1] == '.')
105 de = mgs_direntry_alloc(key_sz + 1);
111 memcpy(de->name, key, key_sz);
112 de->name[key_sz] = 0;
114 cfs_list_add(&de->list, list);
117 rc = iops->next(env, it);
127 CERROR("%s: key failed when listing %s: rc = %d\n",
128 mgs->mgs_obd->obd_name, MOUNT_CONFIGS_DIR, rc);
132 /******************** DB functions *********************/
134 static inline int name_create(char **newname, char *prefix, char *suffix)
137 OBD_ALLOC(*newname, strlen(prefix) + strlen(suffix) + 1);
140 sprintf(*newname, "%s%s", prefix, suffix);
144 static inline void name_destroy(char **name)
147 OBD_FREE(*name, strlen(*name) + 1);
151 struct mgs_fsdb_handler_data
157 /* from the (client) config log, figure out:
158 1. which ost's/mdt's are configured (by index)
159 2. what the last config step is
160 3. COMPAT_18 osc name
162 /* It might be better to have a separate db file, instead of parsing the info
163 out of the client log. This is slow and potentially error-prone. */
164 static int mgs_fsdb_handler(const struct lu_env *env, struct llog_handle *llh,
165 struct llog_rec_hdr *rec, void *data)
167 struct mgs_fsdb_handler_data *d = data;
168 struct fs_db *fsdb = d->fsdb;
169 int cfg_len = rec->lrh_len;
170 char *cfg_buf = (char*) (rec + 1);
171 struct lustre_cfg *lcfg;
176 if (rec->lrh_type != OBD_CFG_REC) {
177 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
181 rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
183 CERROR("Insane cfg\n");
187 lcfg = (struct lustre_cfg *)cfg_buf;
189 CDEBUG(D_INFO, "cmd %x %s %s\n", lcfg->lcfg_command,
190 lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
192 /* Figure out ost indicies */
193 /* lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1 */
194 if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD ||
195 lcfg->lcfg_command == LCFG_LOV_DEL_OBD) {
196 index = simple_strtoul(lustre_cfg_string(lcfg, 2),
198 CDEBUG(D_MGS, "OST index for %s is %u (%s)\n",
199 lustre_cfg_string(lcfg, 1), index,
200 lustre_cfg_string(lcfg, 2));
201 set_bit(index, fsdb->fsdb_ost_index_map);
204 /* Figure out mdt indicies */
205 /* attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:1d834_MNT_client_03f */
206 if ((lcfg->lcfg_command == LCFG_ATTACH) &&
207 (strcmp(lustre_cfg_string(lcfg, 1), LUSTRE_MDC_NAME) == 0)) {
208 rc = server_name2index(lustre_cfg_string(lcfg, 0),
210 if (rc != LDD_F_SV_TYPE_MDT) {
211 CWARN("Unparsable MDC name %s, assuming index 0\n",
212 lustre_cfg_string(lcfg, 0));
216 CDEBUG(D_MGS, "MDT index is %u\n", index);
217 set_bit(index, fsdb->fsdb_mdt_index_map);
218 fsdb->fsdb_mdt_count ++;
222 * figure out the old config. fsdb_gen = 0 means old log
223 * It is obsoleted and not supported anymore
225 if (fsdb->fsdb_gen == 0) {
226 CERROR("Old config format is not supported\n");
231 * compat to 1.8, check osc name used by MDT0 to OSTs, bz18548.
233 if (!test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags) &&
234 lcfg->lcfg_command == LCFG_ATTACH &&
235 strcmp(lustre_cfg_string(lcfg, 1), LUSTRE_OSC_NAME) == 0) {
236 if (OBD_OCD_VERSION_MAJOR(d->ver) == 1 &&
237 OBD_OCD_VERSION_MINOR(d->ver) <= 8) {
238 CWARN("MDT using 1.8 OSC name scheme\n");
239 set_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags);
243 if (lcfg->lcfg_command == LCFG_MARKER) {
244 struct cfg_marker *marker;
245 marker = lustre_cfg_buf(lcfg, 1);
247 d->ver = marker->cm_vers;
249 /* Keep track of the latest marker step */
250 fsdb->fsdb_gen = max(fsdb->fsdb_gen, marker->cm_step);
256 /* fsdb->fsdb_mutex is already held in mgs_find_or_make_fsdb*/
257 static int mgs_get_fsdb_from_llog(const struct lu_env *env,
258 struct mgs_device *mgs,
262 struct llog_handle *loghandle;
263 struct llog_ctxt *ctxt;
264 struct mgs_fsdb_handler_data d = { fsdb, 0 };
269 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
270 LASSERT(ctxt != NULL);
271 rc = name_create(&logname, fsdb->fsdb_name, "-client");
274 rc = llog_open_create(env, ctxt, &loghandle, NULL, logname);
278 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
282 if (llog_get_size(loghandle) <= 1)
283 set_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags);
285 rc = llog_process(env, loghandle, mgs_fsdb_handler, (void *)&d, NULL);
286 CDEBUG(D_INFO, "get_db = %d\n", rc);
288 llog_close(env, loghandle);
290 name_destroy(&logname);
297 static void mgs_free_fsdb_srpc(struct fs_db *fsdb)
299 struct mgs_tgt_srpc_conf *tgtconf;
301 /* free target-specific rules */
302 while (fsdb->fsdb_srpc_tgt) {
303 tgtconf = fsdb->fsdb_srpc_tgt;
304 fsdb->fsdb_srpc_tgt = tgtconf->mtsc_next;
306 LASSERT(tgtconf->mtsc_tgt);
308 sptlrpc_rule_set_free(&tgtconf->mtsc_rset);
309 OBD_FREE(tgtconf->mtsc_tgt, strlen(tgtconf->mtsc_tgt) + 1);
310 OBD_FREE_PTR(tgtconf);
313 /* free general rules */
314 sptlrpc_rule_set_free(&fsdb->fsdb_srpc_gen);
317 struct fs_db *mgs_find_fsdb(struct mgs_device *mgs, char *fsname)
322 cfs_list_for_each(tmp, &mgs->mgs_fs_db_list) {
323 fsdb = cfs_list_entry(tmp, struct fs_db, fsdb_list);
324 if (strcmp(fsdb->fsdb_name, fsname) == 0)
330 /* caller must hold the mgs->mgs_fs_db_lock */
331 static struct fs_db *mgs_new_fsdb(const struct lu_env *env,
332 struct mgs_device *mgs, char *fsname)
338 if (strlen(fsname) >= sizeof(fsdb->fsdb_name)) {
339 CERROR("fsname %s is too long\n", fsname);
347 strcpy(fsdb->fsdb_name, fsname);
348 mutex_init(&fsdb->fsdb_mutex);
349 set_bit(FSDB_UDESC, &fsdb->fsdb_flags);
352 if (strcmp(fsname, MGSSELF_NAME) == 0) {
353 set_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags);
355 OBD_ALLOC(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
356 OBD_ALLOC(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
357 if (!fsdb->fsdb_ost_index_map || !fsdb->fsdb_mdt_index_map) {
358 CERROR("No memory for index maps\n");
359 GOTO(err, rc = -ENOMEM);
362 rc = name_create(&fsdb->fsdb_clilov, fsname, "-clilov");
365 rc = name_create(&fsdb->fsdb_clilmv, fsname, "-clilmv");
369 /* initialise data for NID table */
370 mgs_ir_init_fs(env, mgs, fsdb);
372 lproc_mgs_add_live(mgs, fsdb);
375 cfs_list_add(&fsdb->fsdb_list, &mgs->mgs_fs_db_list);
379 if (fsdb->fsdb_ost_index_map)
380 OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
381 if (fsdb->fsdb_mdt_index_map)
382 OBD_FREE(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
383 name_destroy(&fsdb->fsdb_clilov);
384 name_destroy(&fsdb->fsdb_clilmv);
389 static void mgs_free_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
391 /* wait for anyone with the sem */
392 mutex_lock(&fsdb->fsdb_mutex);
393 lproc_mgs_del_live(mgs, fsdb);
394 cfs_list_del(&fsdb->fsdb_list);
396 /* deinitialize fsr */
397 mgs_ir_fini_fs(mgs, fsdb);
399 if (fsdb->fsdb_ost_index_map)
400 OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
401 if (fsdb->fsdb_mdt_index_map)
402 OBD_FREE(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
403 name_destroy(&fsdb->fsdb_clilov);
404 name_destroy(&fsdb->fsdb_clilmv);
405 mgs_free_fsdb_srpc(fsdb);
406 mutex_unlock(&fsdb->fsdb_mutex);
410 int mgs_init_fsdb_list(struct mgs_device *mgs)
412 CFS_INIT_LIST_HEAD(&mgs->mgs_fs_db_list);
416 int mgs_cleanup_fsdb_list(struct mgs_device *mgs)
419 cfs_list_t *tmp, *tmp2;
420 mutex_lock(&mgs->mgs_mutex);
421 cfs_list_for_each_safe(tmp, tmp2, &mgs->mgs_fs_db_list) {
422 fsdb = cfs_list_entry(tmp, struct fs_db, fsdb_list);
423 mgs_free_fsdb(mgs, fsdb);
425 mutex_unlock(&mgs->mgs_mutex);
429 int mgs_find_or_make_fsdb(const struct lu_env *env,
430 struct mgs_device *mgs, char *name,
437 mutex_lock(&mgs->mgs_mutex);
438 fsdb = mgs_find_fsdb(mgs, name);
440 mutex_unlock(&mgs->mgs_mutex);
445 CDEBUG(D_MGS, "Creating new db\n");
446 fsdb = mgs_new_fsdb(env, mgs, name);
447 /* lock fsdb_mutex until the db is loaded from llogs */
449 mutex_lock(&fsdb->fsdb_mutex);
450 mutex_unlock(&mgs->mgs_mutex);
454 if (!test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
455 /* populate the db from the client llog */
456 rc = mgs_get_fsdb_from_llog(env, mgs, fsdb);
458 CERROR("Can't get db from client log %d\n", rc);
463 /* populate srpc rules from params llog */
464 rc = mgs_get_fsdb_srpc_from_llog(env, mgs, fsdb);
466 CERROR("Can't get db from params log %d\n", rc);
470 mutex_unlock(&fsdb->fsdb_mutex);
476 mutex_unlock(&fsdb->fsdb_mutex);
477 mgs_free_fsdb(mgs, fsdb);
483 -1= empty client log */
484 int mgs_check_index(const struct lu_env *env,
485 struct mgs_device *mgs,
486 struct mgs_target_info *mti)
493 LASSERT(!(mti->mti_flags & LDD_F_NEED_INDEX));
495 rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb);
497 CERROR("Can't get db for %s\n", mti->mti_fsname);
501 if (test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags))
504 if (mti->mti_flags & LDD_F_SV_TYPE_OST)
505 imap = fsdb->fsdb_ost_index_map;
506 else if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
507 imap = fsdb->fsdb_mdt_index_map;
511 if (test_bit(mti->mti_stripe_index, imap))
516 static __inline__ int next_index(void *index_map, int map_len)
519 for (i = 0; i < map_len * 8; i++)
520 if (!test_bit(i, index_map)) {
523 CERROR("max index %d exceeded.\n", i);
528 0 newly marked as in use
530 +EALREADY for update of an old index */
531 static int mgs_set_index(const struct lu_env *env,
532 struct mgs_device *mgs,
533 struct mgs_target_info *mti)
540 rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb);
542 CERROR("Can't get db for %s\n", mti->mti_fsname);
546 mutex_lock(&fsdb->fsdb_mutex);
547 if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
548 imap = fsdb->fsdb_ost_index_map;
549 } else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
550 imap = fsdb->fsdb_mdt_index_map;
551 if (fsdb->fsdb_mdt_count >= MAX_MDT_COUNT) {
552 LCONSOLE_ERROR_MSG(0x13f, "The max mdt count"
553 "is %d\n", (int)MAX_MDT_COUNT);
554 GOTO(out_up, rc = -ERANGE);
557 GOTO(out_up, rc = -EINVAL);
560 if (mti->mti_flags & LDD_F_NEED_INDEX) {
561 rc = next_index(imap, INDEX_MAP_SIZE);
563 GOTO(out_up, rc = -ERANGE);
564 mti->mti_stripe_index = rc;
565 if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
566 fsdb->fsdb_mdt_count ++;
569 if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8) {
570 LCONSOLE_ERROR_MSG(0x13f, "Server %s requested index %d, "
571 "but the max index is %d.\n",
572 mti->mti_svname, mti->mti_stripe_index,
574 GOTO(out_up, rc = -ERANGE);
577 if (test_bit(mti->mti_stripe_index, imap)) {
578 if ((mti->mti_flags & LDD_F_VIRGIN) &&
579 !(mti->mti_flags & LDD_F_WRITECONF)) {
580 LCONSOLE_ERROR_MSG(0x140, "Server %s requested index "
581 "%d, but that index is already in "
582 "use. Use --writeconf to force\n",
584 mti->mti_stripe_index);
585 GOTO(out_up, rc = -EADDRINUSE);
587 CDEBUG(D_MGS, "Server %s updating index %d\n",
588 mti->mti_svname, mti->mti_stripe_index);
589 GOTO(out_up, rc = EALREADY);
593 set_bit(mti->mti_stripe_index, imap);
594 clear_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags);
595 mutex_unlock(&fsdb->fsdb_mutex);
596 server_make_name(mti->mti_flags & ~(LDD_F_VIRGIN | LDD_F_WRITECONF),
597 mti->mti_stripe_index, mti->mti_fsname, mti->mti_svname);
599 CDEBUG(D_MGS, "Set index for %s to %d\n", mti->mti_svname,
600 mti->mti_stripe_index);
604 mutex_unlock(&fsdb->fsdb_mutex);
608 struct mgs_modify_lookup {
609 struct cfg_marker mml_marker;
613 static int mgs_modify_handler(const struct lu_env *env,
614 struct llog_handle *llh,
615 struct llog_rec_hdr *rec, void *data)
617 struct mgs_modify_lookup *mml = data;
618 struct cfg_marker *marker;
619 struct lustre_cfg *lcfg = REC_DATA(rec);
620 int cfg_len = REC_DATA_LEN(rec);
624 if (rec->lrh_type != OBD_CFG_REC) {
625 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
629 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
631 CERROR("Insane cfg\n");
635 /* We only care about markers */
636 if (lcfg->lcfg_command != LCFG_MARKER)
639 marker = lustre_cfg_buf(lcfg, 1);
640 if ((strcmp(mml->mml_marker.cm_comment, marker->cm_comment) == 0) &&
641 (strcmp(mml->mml_marker.cm_tgtname, marker->cm_tgtname) == 0) &&
642 !(marker->cm_flags & CM_SKIP)) {
643 /* Found a non-skipped marker match */
644 CDEBUG(D_MGS, "Changing rec %u marker %d %x->%x: %s %s\n",
645 rec->lrh_index, marker->cm_step,
646 marker->cm_flags, mml->mml_marker.cm_flags,
647 marker->cm_tgtname, marker->cm_comment);
648 /* Overwrite the old marker llog entry */
649 marker->cm_flags &= ~CM_EXCLUDE; /* in case we're unexcluding */
650 marker->cm_flags |= mml->mml_marker.cm_flags;
651 marker->cm_canceltime = mml->mml_marker.cm_canceltime;
652 /* Header and tail are added back to lrh_len in
653 llog_lvfs_write_rec */
654 rec->lrh_len = cfg_len;
655 rc = llog_write(env, llh, rec, NULL, 0, (void *)lcfg,
665 * Modify an existing config log record (for CM_SKIP or CM_EXCLUDE)
667 * 0 - modified successfully,
668 * 1 - no modification was done
671 static int mgs_modify(const struct lu_env *env, struct mgs_device *mgs,
672 struct fs_db *fsdb, struct mgs_target_info *mti,
673 char *logname, char *devname, char *comment, int flags)
675 struct llog_handle *loghandle;
676 struct llog_ctxt *ctxt;
677 struct mgs_modify_lookup *mml;
682 LASSERT(mutex_is_locked(&fsdb->fsdb_mutex));
683 CDEBUG(D_MGS, "modify %s/%s/%s fl=%x\n", logname, devname, comment,
686 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
687 LASSERT(ctxt != NULL);
688 rc = llog_open(env, ctxt, &loghandle, NULL, logname, LLOG_OPEN_EXISTS);
695 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
699 if (llog_get_size(loghandle) <= 1)
700 GOTO(out_close, rc = 0);
704 GOTO(out_close, rc = -ENOMEM);
705 strcpy(mml->mml_marker.cm_comment, comment);
706 strcpy(mml->mml_marker.cm_tgtname, devname);
707 /* Modify mostly means cancel */
708 mml->mml_marker.cm_flags = flags;
709 mml->mml_marker.cm_canceltime = flags ? cfs_time_current_sec() : 0;
710 mml->mml_modified = 0;
711 rc = llog_process(env, loghandle, mgs_modify_handler, (void *)mml,
713 if (!rc && !mml->mml_modified)
718 llog_close(env, loghandle);
721 CERROR("%s: modify %s/%s failed: rc = %d\n",
722 mgs->mgs_obd->obd_name, mti->mti_svname, comment, rc);
727 /** This structure is passed to mgs_replace_handler */
728 struct mgs_replace_uuid_lookup {
729 /* Nids are replaced for this target device */
730 struct mgs_target_info target;
731 /* Temporary modified llog */
732 struct llog_handle *temp_llh;
733 /* Flag is set if in target block*/
734 int in_target_device;
735 /* Nids already added. Just skip (multiple nids) */
736 int device_nids_added;
737 /* Flag is set if this block should not be copied */
742 * Check: a) if block should be skipped
743 * b) is it target block
748 * \retval 0 should not to be skipped
749 * \retval 1 should to be skipped
751 static int check_markers(struct lustre_cfg *lcfg,
752 struct mgs_replace_uuid_lookup *mrul)
754 struct cfg_marker *marker;
756 /* Track markers. Find given device */
757 if (lcfg->lcfg_command == LCFG_MARKER) {
758 marker = lustre_cfg_buf(lcfg, 1);
759 /* Clean llog from records marked as CM_EXCLUDE.
760 CM_SKIP records are used for "active" command
761 and can be restored if needed */
762 if ((marker->cm_flags & (CM_EXCLUDE | CM_START)) ==
763 (CM_EXCLUDE | CM_START)) {
768 if ((marker->cm_flags & (CM_EXCLUDE | CM_END)) ==
769 (CM_EXCLUDE | CM_END)) {
774 if (strcmp(mrul->target.mti_svname, marker->cm_tgtname) == 0) {
775 LASSERT(!(marker->cm_flags & CM_START) ||
776 !(marker->cm_flags & CM_END));
777 if (marker->cm_flags & CM_START) {
778 mrul->in_target_device = 1;
779 mrul->device_nids_added = 0;
780 } else if (marker->cm_flags & CM_END)
781 mrul->in_target_device = 0;
788 static int record_lcfg(const struct lu_env *env, struct llog_handle *llh,
789 struct lustre_cfg *lcfg)
791 struct llog_rec_hdr rec;
797 LASSERT(llh->lgh_ctxt);
799 buflen = lustre_cfg_len(lcfg->lcfg_bufcount,
801 rec.lrh_len = llog_data_len(buflen);
802 rec.lrh_type = OBD_CFG_REC;
804 /* idx = -1 means append */
805 rc = llog_write(env, llh, &rec, NULL, 0, (void *)lcfg, -1);
807 CERROR("failed %d\n", rc);
811 static int record_base(const struct lu_env *env, struct llog_handle *llh,
812 char *cfgname, lnet_nid_t nid, int cmd,
813 char *s1, char *s2, char *s3, char *s4)
815 struct mgs_thread_info *mgi = mgs_env_info(env);
816 struct lustre_cfg *lcfg;
819 CDEBUG(D_MGS, "lcfg %s %#x %s %s %s %s\n", cfgname,
820 cmd, s1, s2, s3, s4);
822 lustre_cfg_bufs_reset(&mgi->mgi_bufs, cfgname);
824 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, s1);
826 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 2, s2);
828 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 3, s3);
830 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 4, s4);
832 lcfg = lustre_cfg_new(cmd, &mgi->mgi_bufs);
835 lcfg->lcfg_nid = nid;
837 rc = record_lcfg(env, llh, lcfg);
839 lustre_cfg_free(lcfg);
842 CERROR("error %d: lcfg %s %#x %s %s %s %s\n", rc, cfgname,
843 cmd, s1, s2, s3, s4);
848 static inline int record_add_uuid(const struct lu_env *env,
849 struct llog_handle *llh,
850 uint64_t nid, char *uuid)
852 return record_base(env, llh, NULL, nid, LCFG_ADD_UUID, uuid, 0, 0, 0);
855 static inline int record_add_conn(const struct lu_env *env,
856 struct llog_handle *llh,
857 char *devname, char *uuid)
859 return record_base(env, llh, devname, 0, LCFG_ADD_CONN, uuid, 0, 0, 0);
862 static inline int record_attach(const struct lu_env *env,
863 struct llog_handle *llh, char *devname,
864 char *type, char *uuid)
866 return record_base(env, llh,devname, 0, LCFG_ATTACH, type, uuid, 0, 0);
869 static inline int record_setup(const struct lu_env *env,
870 struct llog_handle *llh, char *devname,
871 char *s1, char *s2, char *s3, char *s4)
873 return record_base(env, llh, devname, 0, LCFG_SETUP, s1, s2, s3, s4);
877 * \retval <0 record processing error
878 * \retval n record is processed. No need copy original one.
879 * \retval 0 record is not processed.
881 static int process_command(const struct lu_env *env, struct lustre_cfg *lcfg,
882 struct mgs_replace_uuid_lookup *mrul)
889 if (lcfg->lcfg_command == LCFG_ADD_UUID) {
890 /* LCFG_ADD_UUID command found. Let's skip original command
891 and add passed nids */
892 ptr = mrul->target.mti_params;
893 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
894 CDEBUG(D_MGS, "add nid %s with uuid %s, "
895 "device %s\n", libcfs_nid2str(nid),
896 mrul->target.mti_params,
897 mrul->target.mti_svname);
898 rc = record_add_uuid(env,
900 mrul->target.mti_params);
905 if (nids_added == 0) {
906 CERROR("No new nids were added, nid %s with uuid %s, "
907 "device %s\n", libcfs_nid2str(nid),
908 mrul->target.mti_params,
909 mrul->target.mti_svname);
912 mrul->device_nids_added = 1;
918 if (mrul->device_nids_added && lcfg->lcfg_command == LCFG_SETUP) {
919 /* LCFG_SETUP command found. UUID should be changed */
920 rc = record_setup(env,
922 /* devname the same */
923 lustre_cfg_string(lcfg, 0),
924 /* s1 is not changed */
925 lustre_cfg_string(lcfg, 1),
926 /* new uuid should be
928 mrul->target.mti_params,
929 /* s3 is not changed */
930 lustre_cfg_string(lcfg, 3),
931 /* s4 is not changed */
932 lustre_cfg_string(lcfg, 4));
936 /* Another commands in target device block */
941 * Handler that called for every record in llog.
942 * Records are processed in order they placed in llog.
944 * \param[in] llh log to be processed
945 * \param[in] rec current record
946 * \param[in] data mgs_replace_uuid_lookup structure
950 static int mgs_replace_handler(const struct lu_env *env,
951 struct llog_handle *llh,
952 struct llog_rec_hdr *rec,
955 struct llog_rec_hdr local_rec = *rec;
956 struct mgs_replace_uuid_lookup *mrul;
957 struct lustre_cfg *lcfg = REC_DATA(rec);
958 int cfg_len = REC_DATA_LEN(rec);
962 mrul = (struct mgs_replace_uuid_lookup *)data;
964 if (rec->lrh_type != OBD_CFG_REC) {
965 CERROR("unhandled lrh_type: %#x, cmd %x %s %s\n",
966 rec->lrh_type, lcfg->lcfg_command,
967 lustre_cfg_string(lcfg, 0),
968 lustre_cfg_string(lcfg, 1));
972 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
974 /* Do not copy any invalidated records */
975 GOTO(skip_out, rc = 0);
978 rc = check_markers(lcfg, mrul);
979 if (rc || mrul->skip_it)
980 GOTO(skip_out, rc = 0);
982 /* Write to new log all commands outside target device block */
983 if (!mrul->in_target_device)
984 GOTO(copy_out, rc = 0);
986 /* Skip all other LCFG_ADD_UUID and LCFG_ADD_CONN records
987 (failover nids) for this target, assuming that if then
988 primary is changing then so is the failover */
989 if (mrul->device_nids_added &&
990 (lcfg->lcfg_command == LCFG_ADD_UUID ||
991 lcfg->lcfg_command == LCFG_ADD_CONN))
992 GOTO(skip_out, rc = 0);
994 rc = process_command(env, lcfg, mrul);
1001 /* Record is placed in temporary llog as is */
1002 local_rec.lrh_len -= sizeof(*rec) + sizeof(struct llog_rec_tail);
1003 rc = llog_write(env, mrul->temp_llh, &local_rec, NULL, 0,
1006 CDEBUG(D_MGS, "Copied idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
1007 rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
1008 lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
1012 CDEBUG(D_MGS, "Skipped idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
1013 rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
1014 lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
1018 static int mgs_backup_llog(const struct lu_env *env,
1019 struct obd_device *mgs,
1020 char *fsname, char *backup)
1022 struct obd_uuid *uuid;
1023 struct llog_handle *orig_llh, *bak_llh;
1024 struct llog_ctxt *lctxt;
1028 lctxt = llog_get_context(mgs, LLOG_CONFIG_ORIG_CTXT);
1030 CERROR("%s: missing llog context\n", mgs->obd_name);
1031 GOTO(out, rc = -EINVAL);
1034 /* Make sure there's no old backup log */
1035 rc = llog_erase(env, lctxt, NULL, backup);
1036 if (rc < 0 && rc != -ENOENT)
1039 /* open backup log */
1040 rc = llog_open_create(env, lctxt, &bak_llh, NULL, backup);
1042 CERROR("%s: backup logfile open %s: rc = %d\n",
1043 mgs->obd_name, backup, rc);
1047 /* set the log header uuid */
1048 OBD_ALLOC_PTR(uuid);
1050 GOTO(out_put, rc = -ENOMEM);
1051 obd_str2uuid(uuid, backup);
1052 rc = llog_init_handle(env, bak_llh, LLOG_F_IS_PLAIN, uuid);
1055 GOTO(out_close1, rc);
1057 /* open original log */
1058 rc = llog_open(env, lctxt, &orig_llh, NULL, fsname,
1063 GOTO(out_close1, rc);
1066 rc = llog_init_handle(env, orig_llh, LLOG_F_IS_PLAIN, NULL);
1068 GOTO(out_close2, rc);
1070 /* Copy remote log */
1071 rc = llog_process(env, orig_llh, llog_copy_handler,
1072 (void *)bak_llh, NULL);
1075 rc2 = llog_close(env, orig_llh);
1079 rc2 = llog_close(env, bak_llh);
1084 llog_ctxt_put(lctxt);
1087 CERROR("%s: Failed to backup log %s: rc = %d\n",
1088 mgs->obd_name, fsname, rc);
1092 static int mgs_log_is_empty(const struct lu_env *env, struct mgs_device *mgs,
1095 static int mgs_replace_nids_log(const struct lu_env *env,
1096 struct obd_device *mgs, struct fs_db *fsdb,
1097 char *logname, char *devname, char *nids)
1099 struct llog_handle *orig_llh, *backup_llh;
1100 struct llog_ctxt *ctxt;
1101 struct mgs_replace_uuid_lookup *mrul;
1102 struct mgs_device *mgs_dev = lu2mgs_dev(mgs->obd_lu_dev);
1107 CDEBUG(D_MGS, "Replace nids for %s in %s\n", devname, logname);
1109 ctxt = llog_get_context(mgs, LLOG_CONFIG_ORIG_CTXT);
1110 LASSERT(ctxt != NULL);
1112 if (mgs_log_is_empty(env, mgs_dev, logname)) {
1113 /* Log is empty. Nothing to replace */
1114 GOTO(out_put, rc = 0);
1117 OBD_ALLOC(backup, strlen(logname) + 5);
1119 GOTO(out_put, rc = -ENOMEM);
1121 sprintf(backup, "%s.bak", logname);
1123 rc = mgs_backup_llog(env, mgs, logname, backup);
1125 CERROR("%s: can't make backup for %s: rc = %d\n",
1126 mgs->obd_name, logname, rc);
1130 /* Now erase original log file. Connections are not allowed.
1131 Backup is already saved */
1132 rc = llog_erase(env, ctxt, NULL, logname);
1133 if (rc < 0 && rc != -ENOENT)
1136 /* open local log */
1137 rc = llog_open_create(env, ctxt, &orig_llh, NULL, logname);
1139 GOTO(out_restore, rc);
1141 rc = llog_init_handle(env, orig_llh, LLOG_F_IS_PLAIN, NULL);
1143 GOTO(out_closel, rc);
1145 /* open backup llog */
1146 rc = llog_open(env, ctxt, &backup_llh, NULL, backup,
1149 GOTO(out_closel, rc);
1151 rc = llog_init_handle(env, backup_llh, LLOG_F_IS_PLAIN, NULL);
1153 GOTO(out_close, rc);
1155 if (llog_get_size(backup_llh) <= 1)
1156 GOTO(out_close, rc = 0);
1158 OBD_ALLOC_PTR(mrul);
1160 GOTO(out_close, rc = -ENOMEM);
1161 /* devname is only needed information to replace UUID records */
1162 strncpy(mrul->target.mti_svname, devname, MTI_NAME_MAXLEN);
1163 /* parse nids later */
1164 strncpy(mrul->target.mti_params, nids, MTI_PARAM_MAXLEN);
1165 /* Copy records to this temporary llog */
1166 mrul->temp_llh = orig_llh;
1168 rc = llog_process(env, backup_llh, mgs_replace_handler,
1169 (void *)mrul, NULL);
1172 rc2 = llog_close(NULL, backup_llh);
1176 rc2 = llog_close(NULL, orig_llh);
1182 CERROR("%s: llog should be restored: rc = %d\n",
1184 rc2 = mgs_backup_llog(env, mgs, backup, logname);
1186 CERROR("%s: can't restore backup %s: rc = %d\n",
1187 mgs->obd_name, logname, rc2);
1191 OBD_FREE(backup, strlen(backup) + 5);
1194 llog_ctxt_put(ctxt);
1197 CERROR("%s: failed to replace nids in log %s: rc = %d\n",
1198 mgs->obd_name, logname, rc);
1204 * Parse device name and get file system name and/or device index
1206 * \param[in] devname device name (ex. lustre-MDT0000)
1207 * \param[out] fsname file system name(optional)
1208 * \param[out] index device index(optional)
1212 static int mgs_parse_devname(char *devname, char *fsname, __u32 *index)
1217 /* Extract fsname */
1218 ptr = strrchr(devname, '-');
1222 CDEBUG(D_MGS, "Device name %s without fsname\n",
1226 memset(fsname, 0, MTI_NAME_MAXLEN);
1227 strncpy(fsname, devname, ptr - devname);
1228 fsname[MTI_NAME_MAXLEN - 1] = 0;
1232 if (server_name2index(ptr, index, NULL) < 0) {
1233 CDEBUG(D_MGS, "Device name with wrong index\n");
1241 static int only_mgs_is_running(struct obd_device *mgs_obd)
1243 /* TDB: Is global variable with devices count exists? */
1244 int num_devices = get_devices_count();
1245 /* osd, MGS and MGC + self_export
1246 (wc -l /proc/fs/lustre/devices <= 2) && (num_exports <= 2) */
1247 return (num_devices <= 3) && (mgs_obd->obd_num_exports <= 2);
1250 static int name_create_mdt(char **logname, char *fsname, int i)
1254 sprintf(mdt_index, "-MDT%04x", i);
1255 return name_create(logname, fsname, mdt_index);
1259 * Replace nids for \a device to \a nids values
1261 * \param obd MGS obd device
1262 * \param devname nids need to be replaced for this device
1263 * (ex. lustre-OST0000)
1264 * \param nids nids list (ex. nid1,nid2,nid3)
1268 int mgs_replace_nids(const struct lu_env *env,
1269 struct mgs_device *mgs,
1270 char *devname, char *nids)
1272 /* Assume fsname is part of device name */
1273 char fsname[MTI_NAME_MAXLEN];
1280 struct obd_device *mgs_obd = mgs->mgs_obd;
1283 /* We can only change NIDs if no other nodes are connected */
1284 spin_lock(&mgs_obd->obd_dev_lock);
1285 conn_state = mgs_obd->obd_no_conn;
1286 mgs_obd->obd_no_conn = 1;
1287 spin_unlock(&mgs_obd->obd_dev_lock);
1289 /* We can not change nids if not only MGS is started */
1290 if (!only_mgs_is_running(mgs_obd)) {
1291 CERROR("Only MGS is allowed to be started\n");
1292 GOTO(out, rc = -EINPROGRESS);
1295 /* Get fsname and index*/
1296 rc = mgs_parse_devname(devname, fsname, &index);
1300 rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
1302 CERROR("%s: can't find fsdb: rc = %d\n", fsname, rc);
1306 /* Process client llogs */
1307 name_create(&logname, fsname, "-client");
1308 rc = mgs_replace_nids_log(env, mgs_obd, fsdb, logname, devname, nids);
1309 name_destroy(&logname);
1311 CERROR("%s: error while replacing NIDs for %s: rc = %d\n",
1312 fsname, devname, rc);
1316 /* Process MDT llogs */
1317 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
1318 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
1320 name_create_mdt(&logname, fsname, i);
1321 rc = mgs_replace_nids_log(env, mgs_obd, fsdb, logname, devname, nids);
1322 name_destroy(&logname);
1328 spin_lock(&mgs_obd->obd_dev_lock);
1329 mgs_obd->obd_no_conn = conn_state;
1330 spin_unlock(&mgs_obd->obd_dev_lock);
1335 static int record_lov_setup(const struct lu_env *env, struct llog_handle *llh,
1336 char *devname, struct lov_desc *desc)
1338 struct mgs_thread_info *mgi = mgs_env_info(env);
1339 struct lustre_cfg *lcfg;
1342 lustre_cfg_bufs_reset(&mgi->mgi_bufs, devname);
1343 lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, desc, sizeof(*desc));
1344 lcfg = lustre_cfg_new(LCFG_SETUP, &mgi->mgi_bufs);
1347 rc = record_lcfg(env, llh, lcfg);
1349 lustre_cfg_free(lcfg);
1353 static int record_lmv_setup(const struct lu_env *env, struct llog_handle *llh,
1354 char *devname, struct lmv_desc *desc)
1356 struct mgs_thread_info *mgi = mgs_env_info(env);
1357 struct lustre_cfg *lcfg;
1360 lustre_cfg_bufs_reset(&mgi->mgi_bufs, devname);
1361 lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, desc, sizeof(*desc));
1362 lcfg = lustre_cfg_new(LCFG_SETUP, &mgi->mgi_bufs);
1364 rc = record_lcfg(env, llh, lcfg);
1366 lustre_cfg_free(lcfg);
1370 static inline int record_mdc_add(const struct lu_env *env,
1371 struct llog_handle *llh,
1372 char *logname, char *mdcuuid,
1373 char *mdtuuid, char *index,
1376 return record_base(env,llh,logname,0,LCFG_ADD_MDC,
1377 mdtuuid,index,gen,mdcuuid);
1380 static inline int record_lov_add(const struct lu_env *env,
1381 struct llog_handle *llh,
1382 char *lov_name, char *ost_uuid,
1383 char *index, char *gen)
1385 return record_base(env,llh,lov_name,0,LCFG_LOV_ADD_OBD,
1386 ost_uuid,index,gen,0);
1389 static inline int record_mount_opt(const struct lu_env *env,
1390 struct llog_handle *llh,
1391 char *profile, char *lov_name,
1394 return record_base(env,llh,NULL,0,LCFG_MOUNTOPT,
1395 profile,lov_name,mdc_name,0);
1398 static int record_marker(const struct lu_env *env,
1399 struct llog_handle *llh,
1400 struct fs_db *fsdb, __u32 flags,
1401 char *tgtname, char *comment)
1403 struct mgs_thread_info *mgi = mgs_env_info(env);
1404 struct lustre_cfg *lcfg;
1407 if (flags & CM_START)
1409 mgi->mgi_marker.cm_step = fsdb->fsdb_gen;
1410 mgi->mgi_marker.cm_flags = flags;
1411 mgi->mgi_marker.cm_vers = LUSTRE_VERSION_CODE;
1412 strncpy(mgi->mgi_marker.cm_tgtname, tgtname,
1413 sizeof(mgi->mgi_marker.cm_tgtname));
1414 strncpy(mgi->mgi_marker.cm_comment, comment,
1415 sizeof(mgi->mgi_marker.cm_comment));
1416 mgi->mgi_marker.cm_createtime = cfs_time_current_sec();
1417 mgi->mgi_marker.cm_canceltime = 0;
1418 lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL);
1419 lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, &mgi->mgi_marker,
1420 sizeof(mgi->mgi_marker));
1421 lcfg = lustre_cfg_new(LCFG_MARKER, &mgi->mgi_bufs);
1424 rc = record_lcfg(env, llh, lcfg);
1426 lustre_cfg_free(lcfg);
1430 static int record_start_log(const struct lu_env *env, struct mgs_device *mgs,
1431 struct llog_handle **llh, char *name)
1433 static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" };
1434 struct llog_ctxt *ctxt;
1438 GOTO(out, rc = -EBUSY);
1440 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
1442 GOTO(out, rc = -ENODEV);
1443 LASSERT(ctxt->loc_obd == mgs->mgs_obd);
1445 rc = llog_open_create(env, ctxt, llh, NULL, name);
1448 rc = llog_init_handle(env, *llh, LLOG_F_IS_PLAIN, &cfg_uuid);
1450 llog_close(env, *llh);
1452 llog_ctxt_put(ctxt);
1455 CERROR("%s: can't start log %s: rc = %d\n",
1456 mgs->mgs_obd->obd_name, name, rc);
1462 static int record_end_log(const struct lu_env *env, struct llog_handle **llh)
1466 rc = llog_close(env, *llh);
1472 static int mgs_log_is_empty(const struct lu_env *env,
1473 struct mgs_device *mgs, char *name)
1475 struct llog_handle *llh;
1476 struct llog_ctxt *ctxt;
1479 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
1480 LASSERT(ctxt != NULL);
1481 rc = llog_open(env, ctxt, &llh, NULL, name, LLOG_OPEN_EXISTS);
1488 llog_init_handle(env, llh, LLOG_F_IS_PLAIN, NULL);
1490 GOTO(out_close, rc);
1491 rc = llog_get_size(llh);
1494 llog_close(env, llh);
1496 llog_ctxt_put(ctxt);
1497 /* header is record 1 */
1501 /******************** config "macros" *********************/
1503 /* write an lcfg directly into a log (with markers) */
1504 static int mgs_write_log_direct(const struct lu_env *env,
1505 struct mgs_device *mgs, struct fs_db *fsdb,
1506 char *logname, struct lustre_cfg *lcfg,
1507 char *devname, char *comment)
1509 struct llog_handle *llh = NULL;
1516 rc = record_start_log(env, mgs, &llh, logname);
1520 /* FIXME These should be a single journal transaction */
1521 rc = record_marker(env, llh, fsdb, CM_START, devname, comment);
1524 rc = record_lcfg(env, llh, lcfg);
1527 rc = record_marker(env, llh, fsdb, CM_END, devname, comment);
1531 record_end_log(env, &llh);
1535 /* write the lcfg in all logs for the given fs */
1536 int mgs_write_log_direct_all(const struct lu_env *env,
1537 struct mgs_device *mgs,
1539 struct mgs_target_info *mti,
1540 struct lustre_cfg *lcfg,
1541 char *devname, char *comment,
1545 struct mgs_direntry *dirent, *n;
1546 char *fsname = mti->mti_fsname;
1548 int rc = 0, len = strlen(fsname);
1551 /* We need to set params for any future logs
1552 as well. FIXME Append this file to every new log.
1553 Actually, we should store as params (text), not llogs. Or
1555 rc = name_create(&logname, fsname, "-params");
1558 if (mgs_log_is_empty(env, mgs, logname)) {
1559 struct llog_handle *llh = NULL;
1560 rc = record_start_log(env, mgs, &llh, logname);
1561 record_end_log(env, &llh);
1563 name_destroy(&logname);
1567 /* Find all the logs in the CONFIGS directory */
1568 rc = class_dentry_readdir(env, mgs, &list);
1572 /* Could use fsdb index maps instead of directory listing */
1573 cfs_list_for_each_entry_safe(dirent, n, &list, list) {
1574 cfs_list_del(&dirent->list);
1575 /* don't write to sptlrpc rule log */
1576 if (strstr(dirent->name, "-sptlrpc") != NULL)
1579 /* caller wants write server logs only */
1580 if (server_only && strstr(dirent->name, "-client") != NULL)
1583 if (strncmp(fsname, dirent->name, len) == 0) {
1584 CDEBUG(D_MGS, "Changing log %s\n", dirent->name);
1585 /* Erase any old settings of this same parameter */
1586 rc = mgs_modify(env, mgs, fsdb, mti, dirent->name,
1587 devname, comment, CM_SKIP);
1589 CERROR("%s: Can't modify llog %s: rc = %d\n",
1590 mgs->mgs_obd->obd_name, dirent->name,rc);
1591 /* Write the new one */
1593 rc = mgs_write_log_direct(env, mgs, fsdb,
1598 CERROR("%s: writing log %s: rc = %d\n",
1599 mgs->mgs_obd->obd_name,
1604 mgs_direntry_free(dirent);
1610 static int mgs_write_log_mdc_to_mdt(const struct lu_env *env,
1611 struct mgs_device *mgs,
1613 struct mgs_target_info *mti,
1615 static int mgs_write_log_osc_to_lov(const struct lu_env *env,
1616 struct mgs_device *mgs,
1618 struct mgs_target_info *mti,
1619 char *logname, char *suffix, char *lovname,
1620 enum lustre_sec_part sec_part, int flags);
1621 static int name_create_mdt_and_lov(char **logname, char **lovname,
1622 struct fs_db *fsdb, int i);
1624 static int add_param(char *params, char *key, char *val)
1626 char *start = params + strlen(params);
1627 char *end = params + sizeof(((struct mgs_target_info *)0)->mti_params);
1631 keylen = strlen(key);
1632 if (start + 1 + keylen + strlen(val) >= end) {
1633 CERROR("params are too long: %s %s%s\n",
1634 params, key != NULL ? key : "", val);
1638 sprintf(start, " %s%s", key != NULL ? key : "", val);
1642 static int mgs_steal_llog_handler(const struct lu_env *env,
1643 struct llog_handle *llh,
1644 struct llog_rec_hdr *rec, void *data)
1646 struct mgs_device *mgs;
1647 struct obd_device *obd;
1648 struct mgs_target_info *mti, *tmti;
1650 int cfg_len = rec->lrh_len;
1651 char *cfg_buf = (char*) (rec + 1);
1652 struct lustre_cfg *lcfg;
1654 struct llog_handle *mdt_llh = NULL;
1655 static int got_an_osc_or_mdc = 0;
1656 /* 0: not found any osc/mdc;
1660 static int last_step = -1;
1664 mti = ((struct temp_comp*)data)->comp_mti;
1665 tmti = ((struct temp_comp*)data)->comp_tmti;
1666 fsdb = ((struct temp_comp*)data)->comp_fsdb;
1667 obd = ((struct temp_comp *)data)->comp_obd;
1668 mgs = lu2mgs_dev(obd->obd_lu_dev);
1671 if (rec->lrh_type != OBD_CFG_REC) {
1672 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
1676 rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
1678 CERROR("Insane cfg\n");
1682 lcfg = (struct lustre_cfg *)cfg_buf;
1684 if (lcfg->lcfg_command == LCFG_MARKER) {
1685 struct cfg_marker *marker;
1686 marker = lustre_cfg_buf(lcfg, 1);
1687 if (!strncmp(marker->cm_comment,"add osc",7) &&
1688 (marker->cm_flags & CM_START)){
1689 got_an_osc_or_mdc = 1;
1690 strncpy(tmti->mti_svname, marker->cm_tgtname,
1691 sizeof(tmti->mti_svname));
1692 rc = record_start_log(env, mgs, &mdt_llh,
1696 rc = record_marker(env, mdt_llh, fsdb, CM_START,
1697 mti->mti_svname,"add osc(copied)");
1698 record_end_log(env, &mdt_llh);
1699 last_step = marker->cm_step;
1702 if (!strncmp(marker->cm_comment,"add osc",7) &&
1703 (marker->cm_flags & CM_END)){
1704 LASSERT(last_step == marker->cm_step);
1706 got_an_osc_or_mdc = 0;
1707 rc = record_start_log(env, mgs, &mdt_llh,
1711 rc = record_marker(env, mdt_llh, fsdb, CM_END,
1712 mti->mti_svname,"add osc(copied)");
1713 record_end_log(env, &mdt_llh);
1716 if (!strncmp(marker->cm_comment,"add mdc",7) &&
1717 (marker->cm_flags & CM_START)){
1718 got_an_osc_or_mdc = 2;
1719 last_step = marker->cm_step;
1720 memcpy(tmti->mti_svname, marker->cm_tgtname,
1721 strlen(marker->cm_tgtname));
1725 if (!strncmp(marker->cm_comment,"add mdc",7) &&
1726 (marker->cm_flags & CM_END)){
1727 LASSERT(last_step == marker->cm_step);
1729 got_an_osc_or_mdc = 0;
1734 if (got_an_osc_or_mdc == 0 || last_step < 0)
1737 if (lcfg->lcfg_command == LCFG_ADD_UUID) {
1738 uint64_t nodenid = lcfg->lcfg_nid;
1740 if (strlen(tmti->mti_uuid) == 0) {
1741 /* target uuid not set, this config record is before
1742 * LCFG_SETUP, this nid is one of target node nid.
1744 tmti->mti_nids[tmti->mti_nid_count] = nodenid;
1745 tmti->mti_nid_count++;
1747 /* failover node nid */
1748 rc = add_param(tmti->mti_params, PARAM_FAILNODE,
1749 libcfs_nid2str(nodenid));
1755 if (lcfg->lcfg_command == LCFG_SETUP) {
1758 target = lustre_cfg_string(lcfg, 1);
1759 memcpy(tmti->mti_uuid, target, strlen(target));
1763 /* ignore client side sptlrpc_conf_log */
1764 if (lcfg->lcfg_command == LCFG_SPTLRPC_CONF)
1767 if (lcfg->lcfg_command == LCFG_ADD_MDC) {
1770 if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1)
1773 memcpy(tmti->mti_fsname, mti->mti_fsname,
1774 strlen(mti->mti_fsname));
1775 tmti->mti_stripe_index = index;
1777 rc = mgs_write_log_mdc_to_mdt(env, mgs, fsdb, tmti,
1779 memset(tmti, 0, sizeof(*tmti));
1783 if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD) {
1786 char *logname, *lovname;
1788 rc = name_create_mdt_and_lov(&logname, &lovname, fsdb,
1789 mti->mti_stripe_index);
1792 sprintf(mdt_index, "-MDT%04x", mti->mti_stripe_index);
1794 if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1) {
1795 name_destroy(&logname);
1796 name_destroy(&lovname);
1800 tmti->mti_stripe_index = index;
1801 rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, tmti, logname,
1804 name_destroy(&logname);
1805 name_destroy(&lovname);
1811 /* fsdb->fsdb_mutex is already held in mgs_write_log_target*/
1812 /* stealed from mgs_get_fsdb_from_llog*/
1813 static int mgs_steal_llog_for_mdt_from_client(const struct lu_env *env,
1814 struct mgs_device *mgs,
1816 struct temp_comp* comp)
1818 struct llog_handle *loghandle;
1819 struct mgs_target_info *tmti;
1820 struct llog_ctxt *ctxt;
1825 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
1826 LASSERT(ctxt != NULL);
1828 OBD_ALLOC_PTR(tmti);
1830 GOTO(out_ctxt, rc = -ENOMEM);
1832 comp->comp_tmti = tmti;
1833 comp->comp_obd = mgs->mgs_obd;
1835 rc = llog_open(env, ctxt, &loghandle, NULL, client_name,
1843 rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
1845 GOTO(out_close, rc);
1847 rc = llog_process_or_fork(env, loghandle, mgs_steal_llog_handler,
1848 (void *)comp, NULL, false);
1849 CDEBUG(D_MGS, "steal llog re = %d\n", rc);
1851 llog_close(env, loghandle);
1855 llog_ctxt_put(ctxt);
1859 /* lmv is the second thing for client logs */
1860 /* copied from mgs_write_log_lov. Please refer to that. */
1861 static int mgs_write_log_lmv(const struct lu_env *env,
1862 struct mgs_device *mgs,
1864 struct mgs_target_info *mti,
1865 char *logname, char *lmvname)
1867 struct llog_handle *llh = NULL;
1868 struct lmv_desc *lmvdesc;
1873 CDEBUG(D_MGS, "Writing lmv(%s) log for %s\n", lmvname,logname);
1875 OBD_ALLOC_PTR(lmvdesc);
1876 if (lmvdesc == NULL)
1878 lmvdesc->ld_active_tgt_count = 0;
1879 lmvdesc->ld_tgt_count = 0;
1880 sprintf((char*)lmvdesc->ld_uuid.uuid, "%s_UUID", lmvname);
1881 uuid = (char *)lmvdesc->ld_uuid.uuid;
1883 rc = record_start_log(env, mgs, &llh, logname);
1886 rc = record_marker(env, llh, fsdb, CM_START, lmvname, "lmv setup");
1889 rc = record_attach(env, llh, lmvname, "lmv", uuid);
1892 rc = record_lmv_setup(env, llh, lmvname, lmvdesc);
1895 rc = record_marker(env, llh, fsdb, CM_END, lmvname, "lmv setup");
1899 record_end_log(env, &llh);
1901 OBD_FREE_PTR(lmvdesc);
1905 /* lov is the first thing in the mdt and client logs */
1906 static int mgs_write_log_lov(const struct lu_env *env, struct mgs_device *mgs,
1907 struct fs_db *fsdb, struct mgs_target_info *mti,
1908 char *logname, char *lovname)
1910 struct llog_handle *llh = NULL;
1911 struct lov_desc *lovdesc;
1916 CDEBUG(D_MGS, "Writing lov(%s) log for %s\n", lovname, logname);
1919 #01 L attach 0:lov_mdsA 1:lov 2:71ccb_lov_mdsA_19f961a9e1
1920 #02 L lov_setup 0:lov_mdsA 1:(struct lov_desc)
1921 uuid=lov1_UUID, stripe count=1, size=1048576, offset=0, pattern=0
1924 /* FIXME just make lov_setup accept empty desc (put uuid in buf 2) */
1925 OBD_ALLOC_PTR(lovdesc);
1926 if (lovdesc == NULL)
1928 lovdesc->ld_magic = LOV_DESC_MAGIC;
1929 lovdesc->ld_tgt_count = 0;
1930 /* Defaults. Can be changed later by lcfg config_param */
1931 lovdesc->ld_default_stripe_count = 1;
1932 lovdesc->ld_pattern = LOV_PATTERN_RAID0;
1933 lovdesc->ld_default_stripe_size = 1024 * 1024;
1934 lovdesc->ld_default_stripe_offset = -1;
1935 lovdesc->ld_qos_maxage = QOS_DEFAULT_MAXAGE;
1936 sprintf((char*)lovdesc->ld_uuid.uuid, "%s_UUID", lovname);
1937 /* can these be the same? */
1938 uuid = (char *)lovdesc->ld_uuid.uuid;
1940 /* This should always be the first entry in a log.
1941 rc = mgs_clear_log(obd, logname); */
1942 rc = record_start_log(env, mgs, &llh, logname);
1945 /* FIXME these should be a single journal transaction */
1946 rc = record_marker(env, llh, fsdb, CM_START, lovname, "lov setup");
1949 rc = record_attach(env, llh, lovname, "lov", uuid);
1952 rc = record_lov_setup(env, llh, lovname, lovdesc);
1955 rc = record_marker(env, llh, fsdb, CM_END, lovname, "lov setup");
1960 record_end_log(env, &llh);
1962 OBD_FREE_PTR(lovdesc);
1966 /* add failnids to open log */
1967 static int mgs_write_log_failnids(const struct lu_env *env,
1968 struct mgs_target_info *mti,
1969 struct llog_handle *llh,
1972 char *failnodeuuid = NULL;
1973 char *ptr = mti->mti_params;
1978 #03 L add_uuid nid=uml1@tcp(0x20000c0a80201) nal=90 0: 1:uml1_UUID
1979 #04 L add_uuid nid=1@elan(0x1000000000001) nal=90 0: 1:uml1_UUID
1980 #05 L setup 0:OSC_uml1_ost1_mdsA 1:ost1_UUID 2:uml1_UUID
1981 #06 L add_uuid nid=uml2@tcp(0x20000c0a80202) nal=90 0: 1:uml2_UUID
1982 #0x L add_uuid nid=2@elan(0x1000000000002) nal=90 0: 1:uml2_UUID
1983 #07 L add_conn 0:OSC_uml1_ost1_mdsA 1:uml2_UUID
1986 /* Pull failnid info out of params string */
1987 while (class_find_param(ptr, PARAM_FAILNODE, &ptr) == 0) {
1988 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
1989 if (failnodeuuid == NULL) {
1990 /* We don't know the failover node name,
1991 so just use the first nid as the uuid */
1992 rc = name_create(&failnodeuuid,
1993 libcfs_nid2str(nid), "");
1997 CDEBUG(D_MGS, "add nid %s for failover uuid %s, "
1998 "client %s\n", libcfs_nid2str(nid),
1999 failnodeuuid, cliname);
2000 rc = record_add_uuid(env, llh, nid, failnodeuuid);
2003 rc = record_add_conn(env, llh, cliname, failnodeuuid);
2006 name_destroy(&failnodeuuid);
2010 static int mgs_write_log_mdc_to_lmv(const struct lu_env *env,
2011 struct mgs_device *mgs,
2013 struct mgs_target_info *mti,
2014 char *logname, char *lmvname)
2016 struct llog_handle *llh = NULL;
2017 char *mdcname = NULL;
2018 char *nodeuuid = NULL;
2019 char *mdcuuid = NULL;
2020 char *lmvuuid = NULL;
2025 if (mgs_log_is_empty(env, mgs, logname)) {
2026 CERROR("log is empty! Logical error\n");
2030 CDEBUG(D_MGS, "adding mdc for %s to log %s:lmv(%s)\n",
2031 mti->mti_svname, logname, lmvname);
2033 rc = name_create(&nodeuuid, libcfs_nid2str(mti->mti_nids[0]), "");
2036 rc = name_create(&mdcname, mti->mti_svname, "-mdc");
2039 rc = name_create(&mdcuuid, mdcname, "_UUID");
2042 rc = name_create(&lmvuuid, lmvname, "_UUID");
2046 rc = record_start_log(env, mgs, &llh, logname);
2049 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
2053 for (i = 0; i < mti->mti_nid_count; i++) {
2054 CDEBUG(D_MGS, "add nid %s for mdt\n",
2055 libcfs_nid2str(mti->mti_nids[i]));
2057 rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid);
2062 rc = record_attach(env, llh, mdcname, LUSTRE_MDC_NAME, lmvuuid);
2065 rc = record_setup(env, llh, mdcname, mti->mti_uuid, nodeuuid, 0, 0);
2068 rc = mgs_write_log_failnids(env, mti, llh, mdcname);
2071 snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
2072 rc = record_mdc_add(env, llh, lmvname, mdcuuid, mti->mti_uuid,
2076 rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname,
2081 record_end_log(env, &llh);
2083 name_destroy(&lmvuuid);
2084 name_destroy(&mdcuuid);
2085 name_destroy(&mdcname);
2086 name_destroy(&nodeuuid);
2090 /* add new mdc to already existent MDS */
2091 static int mgs_write_log_mdc_to_mdt(const struct lu_env *env,
2092 struct mgs_device *mgs,
2094 struct mgs_target_info *mti,
2097 struct llog_handle *llh = NULL;
2098 char *nodeuuid = NULL;
2099 char *mdcname = NULL;
2100 char *mdcuuid = NULL;
2101 char *mdtuuid = NULL;
2102 int idx = mti->mti_stripe_index;
2107 if (mgs_log_is_empty(env, mgs, logname)) {
2108 CERROR("log is empty! Logical error\n");
2112 CDEBUG(D_MGS, "adding mdc index %d to %s\n", idx, logname);
2114 rc = name_create(&nodeuuid, libcfs_nid2str(mti->mti_nids[0]), "");
2117 snprintf(index, sizeof(index), "-mdc%04x", idx);
2118 rc = name_create(&mdcname, logname, index);
2121 rc = name_create(&mdcuuid, mdcname, "_UUID");
2124 rc = name_create(&mdtuuid, logname, "_UUID");
2128 rc = record_start_log(env, mgs, &llh, logname);
2131 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname, "add mdc");
2134 for (i = 0; i < mti->mti_nid_count; i++) {
2135 CDEBUG(D_MGS, "add nid %s for mdt\n",
2136 libcfs_nid2str(mti->mti_nids[i]));
2137 rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid);
2141 rc = record_attach(env, llh, mdcname, LUSTRE_MDC_NAME, mdcuuid);
2144 rc = record_setup(env, llh, mdcname, mti->mti_uuid, nodeuuid, 0, 0);
2147 rc = mgs_write_log_failnids(env, mti, llh, mdcname);
2150 snprintf(index, sizeof(index), "%d", idx);
2152 rc = record_mdc_add(env, llh, logname, mdcuuid, mti->mti_uuid,
2156 rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname, "add mdc");
2160 record_end_log(env, &llh);
2162 name_destroy(&mdtuuid);
2163 name_destroy(&mdcuuid);
2164 name_destroy(&mdcname);
2165 name_destroy(&nodeuuid);
2169 static int mgs_write_log_mdt0(const struct lu_env *env,
2170 struct mgs_device *mgs,
2172 struct mgs_target_info *mti)
2174 char *log = mti->mti_svname;
2175 struct llog_handle *llh = NULL;
2176 char *uuid, *lovname;
2178 char *ptr = mti->mti_params;
2179 int rc = 0, failout = 0;
2182 OBD_ALLOC(uuid, sizeof(struct obd_uuid));
2186 if (class_find_param(ptr, PARAM_FAILMODE, &ptr) == 0)
2187 failout = (strncmp(ptr, "failout", 7) == 0);
2189 rc = name_create(&lovname, log, "-mdtlov");
2192 if (mgs_log_is_empty(env, mgs, log)) {
2193 rc = mgs_write_log_lov(env, mgs, fsdb, mti, log, lovname);
2198 sprintf(mdt_index, "%d", mti->mti_stripe_index);
2200 rc = record_start_log(env, mgs, &llh, log);
2204 /* add MDT itself */
2206 /* FIXME this whole fn should be a single journal transaction */
2207 sprintf(uuid, "%s_UUID", log);
2208 rc = record_marker(env, llh, fsdb, CM_START, log, "add mdt");
2211 rc = record_attach(env, llh, log, LUSTRE_MDT_NAME, uuid);
2214 rc = record_mount_opt(env, llh, log, lovname, NULL);
2217 rc = record_setup(env, llh, log, uuid, mdt_index, lovname,
2218 failout ? "n" : "f");
2221 rc = record_marker(env, llh, fsdb, CM_END, log, "add mdt");
2225 record_end_log(env, &llh);
2227 name_destroy(&lovname);
2229 OBD_FREE(uuid, sizeof(struct obd_uuid));
2233 static int name_create_mdt_and_lov(char **logname, char **lovname,
2234 struct fs_db *fsdb, int i)
2238 rc = name_create_mdt(logname, fsdb->fsdb_name, i);
2242 if (i == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2243 rc = name_create(lovname, fsdb->fsdb_name, "-mdtlov");
2245 rc = name_create(lovname, *logname, "-mdtlov");
2247 name_destroy(logname);
2253 static inline int name_create_mdt_osc(char **oscname, char *ostname,
2254 struct fs_db *fsdb, int i)
2258 if (i == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2259 sprintf(suffix, "-osc");
2261 sprintf(suffix, "-osc-MDT%04x", i);
2262 return name_create(oscname, ostname, suffix);
2265 /* envelope method for all layers log */
2266 static int mgs_write_log_mdt(const struct lu_env *env,
2267 struct mgs_device *mgs,
2269 struct mgs_target_info *mti)
2271 struct mgs_thread_info *mgi = mgs_env_info(env);
2272 struct llog_handle *llh = NULL;
2277 CDEBUG(D_MGS, "writing new mdt %s\n", mti->mti_svname);
2279 if (mti->mti_uuid[0] == '\0') {
2280 /* Make up our own uuid */
2281 snprintf(mti->mti_uuid, sizeof(mti->mti_uuid),
2282 "%s_UUID", mti->mti_svname);
2286 rc = mgs_write_log_mdt0(env, mgs, fsdb, mti);
2289 /* Append the mdt info to the client log */
2290 rc = name_create(&cliname, mti->mti_fsname, "-client");
2294 if (mgs_log_is_empty(env, mgs, cliname)) {
2295 /* Start client log */
2296 rc = mgs_write_log_lov(env, mgs, fsdb, mti, cliname,
2300 rc = mgs_write_log_lmv(env, mgs, fsdb, mti, cliname,
2307 #09 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0: 1:uml1_UUID
2308 #10 L attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:1d834_MNT_client_03f
2309 #11 L setup 0:MDC_uml1_mdsA_MNT_client 1:mdsA_UUID 2:uml1_UUID
2310 #12 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0: 1:uml2_UUID
2311 #13 L add_conn 0:MDC_uml1_mdsA_MNT_client 1:uml2_UUID
2312 #14 L mount_option 0: 1:client 2:lov1 3:MDC_uml1_mdsA_MNT_client
2315 /* copy client info about lov/lmv */
2316 mgi->mgi_comp.comp_mti = mti;
2317 mgi->mgi_comp.comp_fsdb = fsdb;
2319 rc = mgs_steal_llog_for_mdt_from_client(env, mgs, cliname,
2323 rc = mgs_write_log_mdc_to_lmv(env, mgs, fsdb, mti, cliname,
2329 rc = record_start_log(env, mgs, &llh, cliname);
2333 rc = record_marker(env, llh, fsdb, CM_START, cliname,
2337 rc = record_mount_opt(env, llh, cliname, fsdb->fsdb_clilov,
2341 rc = record_marker(env, llh, fsdb, CM_END, cliname,
2346 /* for_all_existing_mdt except current one */
2347 for (i = 0; i < INDEX_MAP_SIZE * 8; i++){
2349 if (i != mti->mti_stripe_index &&
2350 test_bit(i, fsdb->fsdb_mdt_index_map)) {
2351 rc = name_create_mdt(&mdtname, mti->mti_fsname, i);
2354 rc = mgs_write_log_mdc_to_mdt(env, mgs, fsdb, mti, mdtname);
2355 name_destroy(&mdtname);
2361 record_end_log(env, &llh);
2363 name_destroy(&cliname);
2367 /* Add the ost info to the client/mdt lov */
2368 static int mgs_write_log_osc_to_lov(const struct lu_env *env,
2369 struct mgs_device *mgs, struct fs_db *fsdb,
2370 struct mgs_target_info *mti,
2371 char *logname, char *suffix, char *lovname,
2372 enum lustre_sec_part sec_part, int flags)
2374 struct llog_handle *llh = NULL;
2375 char *nodeuuid = NULL;
2376 char *oscname = NULL;
2377 char *oscuuid = NULL;
2378 char *lovuuid = NULL;
2379 char *svname = NULL;
2384 CDEBUG(D_INFO, "adding osc for %s to log %s\n",
2385 mti->mti_svname, logname);
2387 if (mgs_log_is_empty(env, mgs, logname)) {
2388 CERROR("log is empty! Logical error\n");
2392 rc = name_create(&nodeuuid, libcfs_nid2str(mti->mti_nids[0]), "");
2395 rc = name_create(&svname, mti->mti_svname, "-osc");
2398 /* for the system upgraded from old 1.8, keep using the old osc naming
2399 * style for mdt, see name_create_mdt_osc(). LU-1257 */
2400 if (test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2401 rc = name_create(&oscname, svname, "");
2403 rc = name_create(&oscname, svname, suffix);
2406 rc = name_create(&oscuuid, oscname, "_UUID");
2409 rc = name_create(&lovuuid, lovname, "_UUID");
2414 #03 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0: 1:uml1_UUID
2416 #04 L add_uuid nid=1@elan(0x1000000000001) nal=90 0: 1:uml1_UUID
2417 #04 L attach 0:OSC_uml1_ost1_MNT_client 1:osc 2:89070_lov1_a41dff51a
2418 #05 L setup 0:OSC_uml1_ost1_MNT_client 1:ost1_UUID 2:uml1_UUID
2420 #06 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0: 1:uml2_UUID
2421 #07 L add_conn 0:OSC_uml1_ost1_MNT_client 1:uml2_UUID
2422 #08 L lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1
2425 rc = record_start_log(env, mgs, &llh, logname);
2429 /* FIXME these should be a single journal transaction */
2430 rc = record_marker(env, llh, fsdb, CM_START | flags, mti->mti_svname,
2435 /* NB: don't change record order, because upon MDT steal OSC config
2436 * from client, it treats all nids before LCFG_SETUP as target nids
2437 * (multiple interfaces), while nids after as failover node nids.
2438 * See mgs_steal_llog_handler() LCFG_ADD_UUID.
2440 for (i = 0; i < mti->mti_nid_count; i++) {
2441 CDEBUG(D_MGS, "add nid %s\n", libcfs_nid2str(mti->mti_nids[i]));
2442 rc = record_add_uuid(env, llh, mti->mti_nids[i], nodeuuid);
2446 rc = record_attach(env, llh, oscname, LUSTRE_OSC_NAME, lovuuid);
2449 rc = record_setup(env, llh, oscname, mti->mti_uuid, nodeuuid, 0, 0);
2452 rc = mgs_write_log_failnids(env, mti, llh, oscname);
2455 snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
2456 rc = record_lov_add(env, llh, lovname, mti->mti_uuid, index, "1");
2459 rc = record_marker(env, llh, fsdb, CM_END | flags, mti->mti_svname,
2464 record_end_log(env, &llh);
2466 name_destroy(&lovuuid);
2467 name_destroy(&oscuuid);
2468 name_destroy(&oscname);
2469 name_destroy(&svname);
2470 name_destroy(&nodeuuid);
2474 static int mgs_write_log_ost(const struct lu_env *env,
2475 struct mgs_device *mgs, struct fs_db *fsdb,
2476 struct mgs_target_info *mti)
2478 struct llog_handle *llh = NULL;
2479 char *logname, *lovname;
2480 char *ptr = mti->mti_params;
2481 int rc, flags = 0, failout = 0, i;
2484 CDEBUG(D_MGS, "writing new ost %s\n", mti->mti_svname);
2486 /* The ost startup log */
2488 /* If the ost log already exists, that means that someone reformatted
2489 the ost and it called target_add again. */
2490 if (!mgs_log_is_empty(env, mgs, mti->mti_svname)) {
2491 LCONSOLE_ERROR_MSG(0x141, "The config log for %s already "
2492 "exists, yet the server claims it never "
2493 "registered. It may have been reformatted, "
2494 "or the index changed. writeconf the MDT to "
2495 "regenerate all logs.\n", mti->mti_svname);
2500 attach obdfilter ost1 ost1_UUID
2501 setup /dev/loop2 ldiskfs f|n errors=remount-ro,user_xattr
2503 if (class_find_param(ptr, PARAM_FAILMODE, &ptr) == 0)
2504 failout = (strncmp(ptr, "failout", 7) == 0);
2505 rc = record_start_log(env, mgs, &llh, mti->mti_svname);
2508 /* FIXME these should be a single journal transaction */
2509 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,"add ost");
2512 if (*mti->mti_uuid == '\0')
2513 snprintf(mti->mti_uuid, sizeof(mti->mti_uuid),
2514 "%s_UUID", mti->mti_svname);
2515 rc = record_attach(env, llh, mti->mti_svname,
2516 "obdfilter"/*LUSTRE_OST_NAME*/, mti->mti_uuid);
2519 rc = record_setup(env, llh, mti->mti_svname,
2520 "dev"/*ignored*/, "type"/*ignored*/,
2521 failout ? "n" : "f", 0/*options*/);
2524 rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname, "add ost");
2528 record_end_log(env, &llh);
2531 /* We also have to update the other logs where this osc is part of
2534 if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags)) {
2535 /* If we're upgrading, the old mdt log already has our
2536 entry. Let's do a fake one for fun. */
2537 /* Note that we can't add any new failnids, since we don't
2538 know the old osc names. */
2539 flags = CM_SKIP | CM_UPGRADE146;
2541 } else if ((mti->mti_flags & LDD_F_UPDATE) != LDD_F_UPDATE) {
2542 /* If the update flag isn't set, don't update client/mdt
2545 LCONSOLE_WARN("Client log for %s was not updated; writeconf "
2546 "the MDT first to regenerate it.\n",
2550 /* Add ost to all MDT lov defs */
2551 for (i = 0; i < INDEX_MAP_SIZE * 8; i++){
2552 if (test_bit(i, fsdb->fsdb_mdt_index_map)) {
2555 rc = name_create_mdt_and_lov(&logname, &lovname, fsdb,
2559 sprintf(mdt_index, "-MDT%04x", i);
2560 rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, mti,
2562 lovname, LUSTRE_SP_MDT,
2564 name_destroy(&logname);
2565 name_destroy(&lovname);
2571 /* Append ost info to the client log */
2572 rc = name_create(&logname, mti->mti_fsname, "-client");
2575 if (mgs_log_is_empty(env, mgs, logname)) {
2576 /* Start client log */
2577 rc = mgs_write_log_lov(env, mgs, fsdb, mti, logname,
2581 rc = mgs_write_log_lmv(env, mgs, fsdb, mti, logname,
2586 rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, mti, logname, "",
2587 fsdb->fsdb_clilov, LUSTRE_SP_CLI, 0);
2589 name_destroy(&logname);
2593 static __inline__ int mgs_param_empty(char *ptr)
2597 if ((tmp = strchr(ptr, '=')) && (*(++tmp) == '\0'))
2602 static int mgs_write_log_failnid_internal(const struct lu_env *env,
2603 struct mgs_device *mgs,
2605 struct mgs_target_info *mti,
2606 char *logname, char *cliname)
2609 struct llog_handle *llh = NULL;
2611 if (mgs_param_empty(mti->mti_params)) {
2612 /* Remove _all_ failnids */
2613 rc = mgs_modify(env, mgs, fsdb, mti, logname,
2614 mti->mti_svname, "add failnid", CM_SKIP);
2615 return rc < 0 ? rc : 0;
2618 /* Otherwise failover nids are additive */
2619 rc = record_start_log(env, mgs, &llh, logname);
2622 /* FIXME this should be a single journal transaction */
2623 rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
2627 rc = mgs_write_log_failnids(env, mti, llh, cliname);
2630 rc = record_marker(env, llh, fsdb, CM_END,
2631 mti->mti_svname, "add failnid");
2633 record_end_log(env, &llh);
2638 /* Add additional failnids to an existing log.
2639 The mdc/osc must have been added to logs first */
2640 /* tcp nids must be in dotted-quad ascii -
2641 we can't resolve hostnames from the kernel. */
2642 static int mgs_write_log_add_failnid(const struct lu_env *env,
2643 struct mgs_device *mgs,
2645 struct mgs_target_info *mti)
2647 char *logname, *cliname;
2651 /* FIXME we currently can't erase the failnids
2652 * given when a target first registers, since they aren't part of
2653 * an "add uuid" stanza */
2655 /* Verify that we know about this target */
2656 if (mgs_log_is_empty(env, mgs, mti->mti_svname)) {
2657 LCONSOLE_ERROR_MSG(0x142, "The target %s has not registered "
2658 "yet. It must be started before failnids "
2659 "can be added.\n", mti->mti_svname);
2663 /* Create mdc/osc client name (e.g. lustre-OST0001-osc) */
2664 if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
2665 rc = name_create(&cliname, mti->mti_svname, "-mdc");
2666 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
2667 rc = name_create(&cliname, mti->mti_svname, "-osc");
2673 /* Add failover nids to the client log */
2674 rc = name_create(&logname, mti->mti_fsname, "-client");
2676 name_destroy(&cliname);
2679 rc = mgs_write_log_failnid_internal(env, mgs, fsdb,mti,logname,cliname);
2680 name_destroy(&logname);
2681 name_destroy(&cliname);
2685 if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
2686 /* Add OST failover nids to the MDT logs as well */
2689 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
2690 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
2692 rc = name_create_mdt(&logname, mti->mti_fsname, i);
2695 rc = name_create_mdt_osc(&cliname, mti->mti_svname,
2698 name_destroy(&logname);
2701 rc = mgs_write_log_failnid_internal(env, mgs, fsdb,
2704 name_destroy(&cliname);
2705 name_destroy(&logname);
2714 static int mgs_wlp_lcfg(const struct lu_env *env,
2715 struct mgs_device *mgs, struct fs_db *fsdb,
2716 struct mgs_target_info *mti,
2717 char *logname, struct lustre_cfg_bufs *bufs,
2718 char *tgtname, char *ptr)
2720 char comment[MTI_NAME_MAXLEN];
2722 struct lustre_cfg *lcfg;
2725 /* Erase any old settings of this same parameter */
2726 memcpy(comment, ptr, MTI_NAME_MAXLEN);
2727 comment[MTI_NAME_MAXLEN - 1] = 0;
2728 /* But don't try to match the value. */
2729 if ((tmp = strchr(comment, '=')))
2731 /* FIXME we should skip settings that are the same as old values */
2732 rc = mgs_modify(env, mgs, fsdb, mti, logname, tgtname, comment,CM_SKIP);
2735 del = mgs_param_empty(ptr);
2737 LCONSOLE_INFO("%sing parameter %s.%s in log %s\n", del ? "Disabl" : rc ?
2738 "Sett" : "Modify", tgtname, comment, logname);
2742 lustre_cfg_bufs_reset(bufs, tgtname);
2743 lustre_cfg_bufs_set_string(bufs, 1, ptr);
2744 lcfg = lustre_cfg_new(LCFG_PARAM, bufs);
2747 rc = mgs_write_log_direct(env, mgs, fsdb, logname,lcfg,tgtname,comment);
2748 lustre_cfg_free(lcfg);
2752 /* write global variable settings into log */
2753 static int mgs_write_log_sys(const struct lu_env *env,
2754 struct mgs_device *mgs, struct fs_db *fsdb,
2755 struct mgs_target_info *mti, char *sys, char *ptr)
2757 struct mgs_thread_info *mgi = mgs_env_info(env);
2758 struct lustre_cfg *lcfg;
2760 int rc, cmd, convert = 1;
2762 if (class_match_param(ptr, PARAM_TIMEOUT, &tmp) == 0) {
2763 cmd = LCFG_SET_TIMEOUT;
2764 } else if (class_match_param(ptr, PARAM_LDLM_TIMEOUT, &tmp) == 0) {
2765 cmd = LCFG_SET_LDLM_TIMEOUT;
2766 /* Check for known params here so we can return error to lctl */
2767 } else if ((class_match_param(ptr, PARAM_AT_MIN, &tmp) == 0) ||
2768 (class_match_param(ptr, PARAM_AT_MAX, &tmp) == 0) ||
2769 (class_match_param(ptr, PARAM_AT_EXTRA, &tmp) == 0) ||
2770 (class_match_param(ptr, PARAM_AT_EARLY_MARGIN, &tmp) == 0) ||
2771 (class_match_param(ptr, PARAM_AT_HISTORY, &tmp) == 0)) {
2773 } else if (class_match_param(ptr, PARAM_JOBID_VAR, &tmp) == 0) {
2774 convert = 0; /* Don't convert string value to integer */
2780 if (mgs_param_empty(ptr))
2781 CDEBUG(D_MGS, "global '%s' removed\n", sys);
2783 CDEBUG(D_MGS, "global '%s' val=%s\n", sys, tmp);
2785 lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL);
2786 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, sys);
2787 if (!convert && *tmp != '\0')
2788 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 2, tmp);
2789 lcfg = lustre_cfg_new(cmd, &mgi->mgi_bufs);
2790 lcfg->lcfg_num = convert ? simple_strtoul(tmp, NULL, 0) : 0;
2791 /* truncate the comment to the parameter name */
2795 /* modify all servers and clients */
2796 rc = mgs_write_log_direct_all(env, mgs, fsdb, mti,
2797 *tmp == '\0' ? NULL : lcfg,
2798 mti->mti_fsname, sys, 0);
2799 if (rc == 0 && *tmp != '\0') {
2801 case LCFG_SET_TIMEOUT:
2802 if (!obd_timeout_set || lcfg->lcfg_num > obd_timeout)
2803 class_process_config(lcfg);
2805 case LCFG_SET_LDLM_TIMEOUT:
2806 if (!ldlm_timeout_set || lcfg->lcfg_num > ldlm_timeout)
2807 class_process_config(lcfg);
2814 lustre_cfg_free(lcfg);
2818 /* write quota settings into log */
2819 static int mgs_write_log_quota(const struct lu_env *env, struct mgs_device *mgs,
2820 struct fs_db *fsdb, struct mgs_target_info *mti,
2821 char *quota, char *ptr)
2823 struct mgs_thread_info *mgi = mgs_env_info(env);
2824 struct lustre_cfg *lcfg;
2827 int rc, cmd = LCFG_PARAM;
2829 /* support only 'meta' and 'data' pools so far */
2830 if (class_match_param(ptr, QUOTA_METAPOOL_NAME, &tmp) != 0 &&
2831 class_match_param(ptr, QUOTA_DATAPOOL_NAME, &tmp) != 0) {
2832 CERROR("parameter quota.%s isn't supported (only quota.mdt "
2833 "& quota.ost are)\n", ptr);
2838 CDEBUG(D_MGS, "global '%s' removed\n", quota);
2840 CDEBUG(D_MGS, "global '%s'\n", quota);
2842 if (strchr(tmp, 'u') == NULL && strchr(tmp, 'g') == NULL &&
2843 strcmp(tmp, "none") != 0) {
2844 CERROR("enable option(%s) isn't supported\n", tmp);
2849 lustre_cfg_bufs_reset(&mgi->mgi_bufs, mti->mti_fsname);
2850 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, quota);
2851 lcfg = lustre_cfg_new(cmd, &mgi->mgi_bufs);
2852 /* truncate the comment to the parameter name */
2857 /* XXX we duplicated quota enable information in all server
2858 * config logs, it should be moved to a separate config
2859 * log once we cleanup the config log for global param. */
2860 /* modify all servers */
2861 rc = mgs_write_log_direct_all(env, mgs, fsdb, mti,
2862 *tmp == '\0' ? NULL : lcfg,
2863 mti->mti_fsname, quota, 1);
2865 lustre_cfg_free(lcfg);
2869 static int mgs_srpc_set_param_disk(const struct lu_env *env,
2870 struct mgs_device *mgs,
2872 struct mgs_target_info *mti,
2875 struct mgs_thread_info *mgi = mgs_env_info(env);
2876 struct llog_handle *llh = NULL;
2878 char *comment, *ptr;
2879 struct lustre_cfg *lcfg;
2884 ptr = strchr(param, '=');
2888 OBD_ALLOC(comment, len + 1);
2889 if (comment == NULL)
2891 strncpy(comment, param, len);
2892 comment[len] = '\0';
2895 lustre_cfg_bufs_reset(&mgi->mgi_bufs, mti->mti_svname);
2896 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, param);
2897 lcfg = lustre_cfg_new(LCFG_SPTLRPC_CONF, &mgi->mgi_bufs);
2899 GOTO(out_comment, rc = -ENOMEM);
2901 /* construct log name */
2902 rc = name_create(&logname, mti->mti_fsname, "-sptlrpc");
2906 if (mgs_log_is_empty(env, mgs, logname)) {
2907 rc = record_start_log(env, mgs, &llh, logname);
2910 record_end_log(env, &llh);
2913 /* obsolete old one */
2914 rc = mgs_modify(env, mgs, fsdb, mti, logname, mti->mti_svname,
2918 /* write the new one */
2919 rc = mgs_write_log_direct(env, mgs, fsdb, logname, lcfg,
2920 mti->mti_svname, comment);
2922 CERROR("err %d writing log %s\n", rc, logname);
2924 name_destroy(&logname);
2926 lustre_cfg_free(lcfg);
2928 OBD_FREE(comment, len + 1);
2932 static int mgs_srpc_set_param_udesc_mem(struct fs_db *fsdb,
2937 /* disable the adjustable udesc parameter for now, i.e. use default
2938 * setting that client always ship udesc to MDT if possible. to enable
2939 * it simply remove the following line */
2942 ptr = strchr(param, '=');
2947 if (strcmp(param, PARAM_SRPC_UDESC))
2950 if (strcmp(ptr, "yes") == 0) {
2951 set_bit(FSDB_UDESC, &fsdb->fsdb_flags);
2952 CWARN("Enable user descriptor shipping from client to MDT\n");
2953 } else if (strcmp(ptr, "no") == 0) {
2954 clear_bit(FSDB_UDESC, &fsdb->fsdb_flags);
2955 CWARN("Disable user descriptor shipping from client to MDT\n");
2963 CERROR("Invalid param: %s\n", param);
2967 static int mgs_srpc_set_param_mem(struct fs_db *fsdb,
2971 struct sptlrpc_rule rule;
2972 struct sptlrpc_rule_set *rset;
2976 if (strncmp(param, PARAM_SRPC, sizeof(PARAM_SRPC) - 1) != 0) {
2977 CERROR("Invalid sptlrpc parameter: %s\n", param);
2981 if (strncmp(param, PARAM_SRPC_UDESC,
2982 sizeof(PARAM_SRPC_UDESC) - 1) == 0) {
2983 RETURN(mgs_srpc_set_param_udesc_mem(fsdb, param));
2986 if (strncmp(param, PARAM_SRPC_FLVR, sizeof(PARAM_SRPC_FLVR) - 1) != 0) {
2987 CERROR("Invalid sptlrpc flavor parameter: %s\n", param);
2991 param += sizeof(PARAM_SRPC_FLVR) - 1;
2993 rc = sptlrpc_parse_rule(param, &rule);
2997 /* mgs rules implies must be mgc->mgs */
2998 if (test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
2999 if ((rule.sr_from != LUSTRE_SP_MGC &&
3000 rule.sr_from != LUSTRE_SP_ANY) ||
3001 (rule.sr_to != LUSTRE_SP_MGS &&
3002 rule.sr_to != LUSTRE_SP_ANY))
3006 /* preapre room for this coming rule. svcname format should be:
3007 * - fsname: general rule
3008 * - fsname-tgtname: target-specific rule
3010 if (strchr(svname, '-')) {
3011 struct mgs_tgt_srpc_conf *tgtconf;
3014 for (tgtconf = fsdb->fsdb_srpc_tgt; tgtconf != NULL;
3015 tgtconf = tgtconf->mtsc_next) {
3016 if (!strcmp(tgtconf->mtsc_tgt, svname)) {
3025 OBD_ALLOC_PTR(tgtconf);
3026 if (tgtconf == NULL)
3029 name_len = strlen(svname);
3031 OBD_ALLOC(tgtconf->mtsc_tgt, name_len + 1);
3032 if (tgtconf->mtsc_tgt == NULL) {
3033 OBD_FREE_PTR(tgtconf);
3036 memcpy(tgtconf->mtsc_tgt, svname, name_len);
3038 tgtconf->mtsc_next = fsdb->fsdb_srpc_tgt;
3039 fsdb->fsdb_srpc_tgt = tgtconf;
3042 rset = &tgtconf->mtsc_rset;
3044 rset = &fsdb->fsdb_srpc_gen;
3047 rc = sptlrpc_rule_set_merge(rset, &rule);
3052 static int mgs_srpc_set_param(const struct lu_env *env,
3053 struct mgs_device *mgs,
3055 struct mgs_target_info *mti,
3065 /* keep a copy of original param, which could be destroied
3067 copy_size = strlen(param) + 1;
3068 OBD_ALLOC(copy, copy_size);
3071 memcpy(copy, param, copy_size);
3073 rc = mgs_srpc_set_param_mem(fsdb, mti->mti_svname, param);
3077 /* previous steps guaranteed the syntax is correct */
3078 rc = mgs_srpc_set_param_disk(env, mgs, fsdb, mti, copy);
3082 if (test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
3084 * for mgs rules, make them effective immediately.
3086 LASSERT(fsdb->fsdb_srpc_tgt == NULL);
3087 sptlrpc_target_update_exp_flavor(mgs->mgs_obd,
3088 &fsdb->fsdb_srpc_gen);
3092 OBD_FREE(copy, copy_size);
3096 struct mgs_srpc_read_data {
3097 struct fs_db *msrd_fsdb;
3101 static int mgs_srpc_read_handler(const struct lu_env *env,
3102 struct llog_handle *llh,
3103 struct llog_rec_hdr *rec, void *data)
3105 struct mgs_srpc_read_data *msrd = data;
3106 struct cfg_marker *marker;
3107 struct lustre_cfg *lcfg = REC_DATA(rec);
3108 char *svname, *param;
3112 if (rec->lrh_type != OBD_CFG_REC) {
3113 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
3117 cfg_len = rec->lrh_len - sizeof(struct llog_rec_hdr) -
3118 sizeof(struct llog_rec_tail);
3120 rc = lustre_cfg_sanity_check(lcfg, cfg_len);
3122 CERROR("Insane cfg\n");
3126 if (lcfg->lcfg_command == LCFG_MARKER) {
3127 marker = lustre_cfg_buf(lcfg, 1);
3129 if (marker->cm_flags & CM_START &&
3130 marker->cm_flags & CM_SKIP)
3131 msrd->msrd_skip = 1;
3132 if (marker->cm_flags & CM_END)
3133 msrd->msrd_skip = 0;
3138 if (msrd->msrd_skip)
3141 if (lcfg->lcfg_command != LCFG_SPTLRPC_CONF) {
3142 CERROR("invalid command (%x)\n", lcfg->lcfg_command);
3146 svname = lustre_cfg_string(lcfg, 0);
3147 if (svname == NULL) {
3148 CERROR("svname is empty\n");
3152 param = lustre_cfg_string(lcfg, 1);
3153 if (param == NULL) {
3154 CERROR("param is empty\n");
3158 rc = mgs_srpc_set_param_mem(msrd->msrd_fsdb, svname, param);
3160 CERROR("read sptlrpc record error (%d): %s\n", rc, param);
3165 int mgs_get_fsdb_srpc_from_llog(const struct lu_env *env,
3166 struct mgs_device *mgs,
3169 struct llog_handle *llh = NULL;
3170 struct llog_ctxt *ctxt;
3172 struct mgs_srpc_read_data msrd;
3176 /* construct log name */
3177 rc = name_create(&logname, fsdb->fsdb_name, "-sptlrpc");
3181 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
3182 LASSERT(ctxt != NULL);
3184 if (mgs_log_is_empty(env, mgs, logname))
3187 rc = llog_open(env, ctxt, &llh, NULL, logname,
3195 rc = llog_init_handle(env, llh, LLOG_F_IS_PLAIN, NULL);
3197 GOTO(out_close, rc);
3199 if (llog_get_size(llh) <= 1)
3200 GOTO(out_close, rc = 0);
3202 msrd.msrd_fsdb = fsdb;
3205 rc = llog_process(env, llh, mgs_srpc_read_handler, (void *)&msrd,
3209 llog_close(env, llh);
3211 llog_ctxt_put(ctxt);
3212 name_destroy(&logname);
3215 CERROR("failed to read sptlrpc config database: %d\n", rc);
3219 /* Permanent settings of all parameters by writing into the appropriate
3220 * configuration logs.
3221 * A parameter with null value ("<param>='\0'") means to erase it out of
3224 static int mgs_write_log_param(const struct lu_env *env,
3225 struct mgs_device *mgs, struct fs_db *fsdb,
3226 struct mgs_target_info *mti, char *ptr)
3228 struct mgs_thread_info *mgi = mgs_env_info(env);
3231 int rc = 0, rc2 = 0;
3234 /* For various parameter settings, we have to figure out which logs
3235 care about them (e.g. both mdt and client for lov settings) */
3236 CDEBUG(D_MGS, "next param '%s'\n", ptr);
3238 /* The params are stored in MOUNT_DATA_FILE and modified via
3239 tunefs.lustre, or set using lctl conf_param */
3241 /* Processed in lustre_start_mgc */
3242 if (class_match_param(ptr, PARAM_MGSNODE, NULL) == 0)
3245 /* Processed in ost/mdt */
3246 if (class_match_param(ptr, PARAM_NETWORK, NULL) == 0)
3249 /* Processed in mgs_write_log_ost */
3250 if (class_match_param(ptr, PARAM_FAILMODE, NULL) == 0) {
3251 if (mti->mti_flags & LDD_F_PARAM) {
3252 LCONSOLE_ERROR_MSG(0x169, "%s can only be "
3253 "changed with tunefs.lustre"
3254 "and --writeconf\n", ptr);
3260 if (class_match_param(ptr, PARAM_SRPC, NULL) == 0) {
3261 rc = mgs_srpc_set_param(env, mgs, fsdb, mti, ptr);
3265 if (class_match_param(ptr, PARAM_FAILNODE, NULL) == 0) {
3266 /* Add a failover nidlist */
3268 /* We already processed failovers params for new
3269 targets in mgs_write_log_target */
3270 if (mti->mti_flags & LDD_F_PARAM) {
3271 CDEBUG(D_MGS, "Adding failnode\n");
3272 rc = mgs_write_log_add_failnid(env, mgs, fsdb, mti);
3277 if (class_match_param(ptr, PARAM_SYS, &tmp) == 0) {
3278 rc = mgs_write_log_sys(env, mgs, fsdb, mti, ptr, tmp);
3282 if (class_match_param(ptr, PARAM_QUOTA, &tmp) == 0) {
3283 rc = mgs_write_log_quota(env, mgs, fsdb, mti, ptr, tmp);
3287 if (class_match_param(ptr, PARAM_OSC""PARAM_ACTIVE, &tmp) == 0) {
3288 /* active=0 means off, anything else means on */
3289 int flag = (*tmp == '0') ? CM_EXCLUDE : 0;
3292 if (!(mti->mti_flags & LDD_F_SV_TYPE_OST)) {
3293 LCONSOLE_ERROR_MSG(0x144, "%s: Only OSCs can "
3294 "be (de)activated.\n",
3296 GOTO(end, rc = -EINVAL);
3298 LCONSOLE_WARN("Permanently %sactivating %s\n",
3299 flag ? "de": "re", mti->mti_svname);
3301 rc = name_create(&logname, mti->mti_fsname, "-client");
3304 rc = mgs_modify(env, mgs, fsdb, mti, logname,
3305 mti->mti_svname, "add osc", flag);
3306 name_destroy(&logname);
3310 /* Add to all MDT logs for CMD */
3311 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3312 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
3314 rc = name_create_mdt(&logname, mti->mti_fsname, i);
3317 rc = mgs_modify(env, mgs, fsdb, mti, logname,
3318 mti->mti_svname, "add osc", flag);
3319 name_destroy(&logname);
3325 LCONSOLE_ERROR_MSG(0x145, "Couldn't find %s in"
3326 "log (%d). No permanent "
3327 "changes were made to the "
3329 mti->mti_svname, rc);
3330 if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags))
3331 LCONSOLE_ERROR_MSG(0x146, "This may be"
3336 "update the logs.\n");
3339 /* Fall through to osc proc for deactivating live OSC
3340 on running MDT / clients. */
3342 /* Below here, let obd's XXX_process_config methods handle it */
3344 /* All lov. in proc */
3345 if (class_match_param(ptr, PARAM_LOV, NULL) == 0) {
3348 CDEBUG(D_MGS, "lov param %s\n", ptr);
3349 if (!(mti->mti_flags & LDD_F_SV_TYPE_MDT)) {
3350 LCONSOLE_ERROR_MSG(0x147, "LOV params must be "
3351 "set on the MDT, not %s. "
3358 if (mgs_log_is_empty(env, mgs, mti->mti_svname))
3359 GOTO(end, rc = -ENODEV);
3361 rc = name_create_mdt_and_lov(&logname, &mdtlovname, fsdb,
3362 mti->mti_stripe_index);
3365 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, mti->mti_svname,
3366 &mgi->mgi_bufs, mdtlovname, ptr);
3367 name_destroy(&logname);
3368 name_destroy(&mdtlovname);
3373 rc = name_create(&logname, mti->mti_fsname, "-client");
3376 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname, &mgi->mgi_bufs,
3377 fsdb->fsdb_clilov, ptr);
3378 name_destroy(&logname);
3382 /* All osc., mdc., llite. params in proc */
3383 if ((class_match_param(ptr, PARAM_OSC, NULL) == 0) ||
3384 (class_match_param(ptr, PARAM_MDC, NULL) == 0) ||
3385 (class_match_param(ptr, PARAM_LLITE, NULL) == 0)) {
3388 if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags)) {
3389 LCONSOLE_ERROR_MSG(0x148, "Upgraded client logs for %s"
3390 " cannot be modified. Consider"
3391 " updating the configuration with"
3394 GOTO(end, rc = -EINVAL);
3396 if (memcmp(ptr, PARAM_LLITE, strlen(PARAM_LLITE)) == 0) {
3397 rc = name_create(&cname, mti->mti_fsname, "-client");
3398 /* Add the client type to match the obdname in
3399 class_config_llog_handler */
3400 } else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
3401 rc = name_create(&cname, mti->mti_svname, "-mdc");
3402 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
3403 rc = name_create(&cname, mti->mti_svname, "-osc");
3405 GOTO(end, rc = -EINVAL);
3410 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
3413 rc = name_create(&logname, mti->mti_fsname, "-client");
3415 name_destroy(&cname);
3418 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname, &mgi->mgi_bufs,
3421 /* osc params affect the MDT as well */
3422 if (!rc && (mti->mti_flags & LDD_F_SV_TYPE_OST)) {
3425 for (i = 0; i < INDEX_MAP_SIZE * 8; i++){
3426 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
3428 name_destroy(&cname);
3429 rc = name_create_mdt_osc(&cname, mti->mti_svname,
3431 name_destroy(&logname);
3434 rc = name_create_mdt(&logname,
3435 mti->mti_fsname, i);
3438 if (!mgs_log_is_empty(env, mgs, logname)) {
3439 rc = mgs_wlp_lcfg(env, mgs, fsdb,
3448 name_destroy(&logname);
3449 name_destroy(&cname);
3453 /* All mdt. params in proc */
3454 if (class_match_param(ptr, PARAM_MDT, NULL) == 0) {
3458 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
3459 if (strncmp(mti->mti_svname, mti->mti_fsname,
3460 MTI_NAME_MAXLEN) == 0)
3461 /* device is unspecified completely? */
3462 rc = LDD_F_SV_TYPE_MDT | LDD_F_SV_ALL;
3464 rc = server_name2index(mti->mti_svname, &idx, NULL);
3467 if ((rc & LDD_F_SV_TYPE_MDT) == 0)
3469 if (rc & LDD_F_SV_ALL) {
3470 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3472 fsdb->fsdb_mdt_index_map))
3474 rc = name_create_mdt(&logname,
3475 mti->mti_fsname, i);
3478 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti,
3479 logname, &mgi->mgi_bufs,
3481 name_destroy(&logname);
3486 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti,
3487 mti->mti_svname, &mgi->mgi_bufs,
3488 mti->mti_svname, ptr);
3495 /* All mdd., ost. params in proc */
3496 if ((class_match_param(ptr, PARAM_MDD, NULL) == 0) ||
3497 (class_match_param(ptr, PARAM_OST, NULL) == 0)) {
3498 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
3499 if (mgs_log_is_empty(env, mgs, mti->mti_svname))
3500 GOTO(end, rc = -ENODEV);
3502 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, mti->mti_svname,
3503 &mgi->mgi_bufs, mti->mti_svname, ptr);
3507 LCONSOLE_WARN("Ignoring unrecognized param '%s'\n", ptr);
3512 CERROR("err %d on param '%s'\n", rc, ptr);
3517 /* Not implementing automatic failover nid addition at this time. */
3518 int mgs_check_failnid(const struct lu_env *env, struct mgs_device *mgs,
3519 struct mgs_target_info *mti)
3526 rc = mgs_find_or_make_fsdb(obd, fsname, &fsdb);
3530 if (mgs_log_is_empty(obd, mti->mti_svname))
3531 /* should never happen */
3534 CDEBUG(D_MGS, "Checking for new failnids for %s\n", mti->mti_svname);
3536 /* FIXME We can just check mti->params to see if we're already in
3537 the failover list. Modify mti->params for rewriting back at
3538 server_register_target(). */
3540 mutex_lock(&fsdb->fsdb_mutex);
3541 rc = mgs_write_log_add_failnid(obd, fsdb, mti);
3542 mutex_unlock(&fsdb->fsdb_mutex);
3549 int mgs_write_log_target(const struct lu_env *env,
3550 struct mgs_device *mgs,
3551 struct mgs_target_info *mti,
3558 /* set/check the new target index */
3559 rc = mgs_set_index(env, mgs, mti);
3561 CERROR("Can't get index (%d)\n", rc);
3565 if (rc == EALREADY) {
3566 LCONSOLE_WARN("Found index %d for %s, updating log\n",
3567 mti->mti_stripe_index, mti->mti_svname);
3568 /* We would like to mark old log sections as invalid
3569 and add new log sections in the client and mdt logs.
3570 But if we add new sections, then live clients will
3571 get repeat setup instructions for already running
3572 osc's. So don't update the client/mdt logs. */
3573 mti->mti_flags &= ~LDD_F_UPDATE;
3576 mutex_lock(&fsdb->fsdb_mutex);
3578 if (mti->mti_flags &
3579 (LDD_F_VIRGIN | LDD_F_UPGRADE14 | LDD_F_WRITECONF)) {
3580 /* Generate a log from scratch */
3581 if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
3582 rc = mgs_write_log_mdt(env, mgs, fsdb, mti);
3583 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
3584 rc = mgs_write_log_ost(env, mgs, fsdb, mti);
3586 CERROR("Unknown target type %#x, can't create log for "
3587 "%s\n", mti->mti_flags, mti->mti_svname);
3590 CERROR("Can't write logs for %s (%d)\n",
3591 mti->mti_svname, rc);
3595 /* Just update the params from tunefs in mgs_write_log_params */
3596 CDEBUG(D_MGS, "Update params for %s\n", mti->mti_svname);
3597 mti->mti_flags |= LDD_F_PARAM;
3600 /* allocate temporary buffer, where class_get_next_param will
3601 make copy of a current parameter */
3602 OBD_ALLOC(buf, strlen(mti->mti_params) + 1);
3604 GOTO(out_up, rc = -ENOMEM);
3605 params = mti->mti_params;
3606 while (params != NULL) {
3607 rc = class_get_next_param(¶ms, buf);
3610 /* there is no next parameter, that is
3615 CDEBUG(D_MGS, "remaining string: '%s', param: '%s'\n",
3617 rc = mgs_write_log_param(env, mgs, fsdb, mti, buf);
3622 OBD_FREE(buf, strlen(mti->mti_params) + 1);
3625 mutex_unlock(&fsdb->fsdb_mutex);
3629 int mgs_erase_log(const struct lu_env *env, struct mgs_device *mgs, char *name)
3631 struct llog_ctxt *ctxt;
3634 ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
3636 CERROR("%s: MGS config context doesn't exist\n",
3637 mgs->mgs_obd->obd_name);
3640 rc = llog_erase(env, ctxt, NULL, name);
3641 /* llog may not exist */
3644 llog_ctxt_put(ctxt);
3648 CERROR("%s: failed to clear log %s: %d\n",
3649 mgs->mgs_obd->obd_name, name, rc);
3654 /* erase all logs for the given fs */
3655 int mgs_erase_logs(const struct lu_env *env, struct mgs_device *mgs, char *fsname)
3659 struct mgs_direntry *dirent, *n;
3660 int rc, len = strlen(fsname);
3664 /* Find all the logs in the CONFIGS directory */
3665 rc = class_dentry_readdir(env, mgs, &list);
3669 mutex_lock(&mgs->mgs_mutex);
3671 /* Delete the fs db */
3672 fsdb = mgs_find_fsdb(mgs, fsname);
3674 mgs_free_fsdb(mgs, fsdb);
3676 mutex_unlock(&mgs->mgs_mutex);
3678 cfs_list_for_each_entry_safe(dirent, n, &list, list) {
3679 cfs_list_del(&dirent->list);
3680 suffix = strrchr(dirent->name, '-');
3681 if (suffix != NULL) {
3682 if ((len == suffix - dirent->name) &&
3683 (strncmp(fsname, dirent->name, len) == 0)) {
3684 CDEBUG(D_MGS, "Removing log %s\n",
3686 mgs_erase_log(env, mgs, dirent->name);
3689 mgs_direntry_free(dirent);
3695 /* from llog_swab */
3696 static void print_lustre_cfg(struct lustre_cfg *lcfg)
3701 CDEBUG(D_MGS, "lustre_cfg: %p\n", lcfg);
3702 CDEBUG(D_MGS, "\tlcfg->lcfg_version: %#x\n", lcfg->lcfg_version);
3704 CDEBUG(D_MGS, "\tlcfg->lcfg_command: %#x\n", lcfg->lcfg_command);
3705 CDEBUG(D_MGS, "\tlcfg->lcfg_num: %#x\n", lcfg->lcfg_num);
3706 CDEBUG(D_MGS, "\tlcfg->lcfg_flags: %#x\n", lcfg->lcfg_flags);
3707 CDEBUG(D_MGS, "\tlcfg->lcfg_nid: %s\n", libcfs_nid2str(lcfg->lcfg_nid));
3709 CDEBUG(D_MGS, "\tlcfg->lcfg_bufcount: %d\n", lcfg->lcfg_bufcount);
3710 if (lcfg->lcfg_bufcount < LUSTRE_CFG_MAX_BUFCOUNT)
3711 for (i = 0; i < lcfg->lcfg_bufcount; i++) {
3712 CDEBUG(D_MGS, "\tlcfg->lcfg_buflens[%d]: %d %s\n",
3713 i, lcfg->lcfg_buflens[i],
3714 lustre_cfg_string(lcfg, i));
3719 /* Set a permanent (config log) param for a target or fs
3720 * \param lcfg buf0 may contain the device (testfs-MDT0000) name
3721 * buf1 contains the single parameter
3723 int mgs_setparam(const struct lu_env *env, struct mgs_device *mgs,
3724 struct lustre_cfg *lcfg, char *fsname)
3727 struct mgs_target_info *mti;
3728 char *devname, *param;
3734 print_lustre_cfg(lcfg);
3736 /* lustre, lustre-mdtlov, lustre-client, lustre-MDT0000 */
3737 devname = lustre_cfg_string(lcfg, 0);
3738 param = lustre_cfg_string(lcfg, 1);
3740 /* Assume device name embedded in param:
3741 lustre-OST0000.osc.max_dirty_mb=32 */
3742 ptr = strchr(param, '.');
3750 LCONSOLE_ERROR_MSG(0x14d, "No target specified: %s\n", param);
3754 rc = mgs_parse_devname(devname, fsname, NULL);
3755 if (rc == 0 && !mgs_parse_devname(devname, NULL, &index)) {
3756 /* param related to llite isn't allowed to set by OST or MDT */
3757 if (rc == 0 && strncmp(param, PARAM_LLITE,
3758 sizeof(PARAM_LLITE)) == 0)
3761 /* assume devname is the fsname */
3762 memset(fsname, 0, MTI_NAME_MAXLEN);
3763 strncpy(fsname, devname, MTI_NAME_MAXLEN);
3764 fsname[MTI_NAME_MAXLEN - 1] = 0;
3766 CDEBUG(D_MGS, "setparam fs='%s' device='%s'\n", fsname, devname);
3768 rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
3771 if (!test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags) &&
3772 test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) {
3773 CERROR("No filesystem targets for %s. cfg_device from lctl "
3774 "is '%s'\n", fsname, devname);
3775 mgs_free_fsdb(mgs, fsdb);
3779 /* Create a fake mti to hold everything */
3782 GOTO(out, rc = -ENOMEM);
3783 strncpy(mti->mti_fsname, fsname, MTI_NAME_MAXLEN);
3784 strncpy(mti->mti_svname, devname, MTI_NAME_MAXLEN);
3785 strncpy(mti->mti_params, param, sizeof(mti->mti_params));
3786 rc = server_name2index(mti->mti_svname, &mti->mti_stripe_index, &tmp);
3788 /* Not a valid server; may be only fsname */
3791 /* Strip -osc or -mdc suffix from svname */
3792 if (server_make_name(rc, mti->mti_stripe_index, mti->mti_fsname,
3794 GOTO(out, rc = -EINVAL);
3796 mti->mti_flags = rc | LDD_F_PARAM;
3798 mutex_lock(&fsdb->fsdb_mutex);
3799 rc = mgs_write_log_param(env, mgs, fsdb, mti, mti->mti_params);
3800 mutex_unlock(&fsdb->fsdb_mutex);
3803 * Revoke lock so everyone updates. Should be alright if
3804 * someone was already reading while we were updating the logs,
3805 * so we don't really need to hold the lock while we're
3808 mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG);
3814 static int mgs_write_log_pool(const struct lu_env *env,
3815 struct mgs_device *mgs, char *logname,
3816 struct fs_db *fsdb, char *lovname,
3817 enum lcfg_command_type cmd,
3818 char *poolname, char *fsname,
3819 char *ostname, char *comment)
3821 struct llog_handle *llh = NULL;
3824 rc = record_start_log(env, mgs, &llh, logname);
3827 rc = record_marker(env, llh, fsdb, CM_START, lovname, comment);
3830 rc = record_base(env, llh, lovname, 0, cmd, poolname, fsname, ostname, 0);
3833 rc = record_marker(env, llh, fsdb, CM_END, lovname, comment);
3835 record_end_log(env, &llh);
3839 int mgs_pool_cmd(const struct lu_env *env, struct mgs_device *mgs,
3840 enum lcfg_command_type cmd, char *fsname,
3841 char *poolname, char *ostname)
3846 char *label = NULL, *canceled_label = NULL;
3848 struct mgs_target_info *mti = NULL;
3852 rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
3854 CERROR("Can't get db for %s\n", fsname);
3857 if (test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) {
3858 CERROR("%s is not defined\n", fsname);
3859 mgs_free_fsdb(mgs, fsdb);
3863 label_sz = 10 + strlen(fsname) + strlen(poolname);
3865 /* check if ostname match fsname */
3866 if (ostname != NULL) {
3869 ptr = strrchr(ostname, '-');
3870 if ((ptr == NULL) ||
3871 (strncmp(fsname, ostname, ptr-ostname) != 0))
3873 label_sz += strlen(ostname);
3876 OBD_ALLOC(label, label_sz);
3883 "new %s.%s", fsname, poolname);
3887 "add %s.%s.%s", fsname, poolname, ostname);
3890 OBD_ALLOC(canceled_label, label_sz);
3891 if (canceled_label == NULL)
3892 GOTO(out_label, rc = -ENOMEM);
3894 "rem %s.%s.%s", fsname, poolname, ostname);
3895 sprintf(canceled_label,
3896 "add %s.%s.%s", fsname, poolname, ostname);
3899 OBD_ALLOC(canceled_label, label_sz);
3900 if (canceled_label == NULL)
3901 GOTO(out_label, rc = -ENOMEM);
3903 "del %s.%s", fsname, poolname);
3904 sprintf(canceled_label,
3905 "new %s.%s", fsname, poolname);
3911 mutex_lock(&fsdb->fsdb_mutex);
3913 if (canceled_label != NULL) {
3916 GOTO(out_cancel, rc = -ENOMEM);
3919 /* write pool def to all MDT logs */
3920 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3921 if (test_bit(i, fsdb->fsdb_mdt_index_map)) {
3922 rc = name_create_mdt_and_lov(&logname, &lovname,
3925 mutex_unlock(&fsdb->fsdb_mutex);
3928 if (canceled_label != NULL) {
3929 strcpy(mti->mti_svname, "lov pool");
3930 rc = mgs_modify(env, mgs, fsdb, mti, logname,
3931 lovname, canceled_label,
3936 rc = mgs_write_log_pool(env, mgs, logname,
3940 name_destroy(&logname);
3941 name_destroy(&lovname);
3943 mutex_unlock(&fsdb->fsdb_mutex);
3949 rc = name_create(&logname, fsname, "-client");
3951 mutex_unlock(&fsdb->fsdb_mutex);
3954 if (canceled_label != NULL) {
3955 rc = mgs_modify(env, mgs, fsdb, mti, logname,
3956 fsdb->fsdb_clilov, canceled_label, CM_SKIP);
3958 mutex_unlock(&fsdb->fsdb_mutex);
3959 name_destroy(&logname);
3964 rc = mgs_write_log_pool(env, mgs, logname, fsdb, fsdb->fsdb_clilov,
3965 cmd, fsname, poolname, ostname, label);
3966 mutex_unlock(&fsdb->fsdb_mutex);
3967 name_destroy(&logname);
3968 /* request for update */
3969 mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG);
3976 if (canceled_label != NULL)
3977 OBD_FREE(canceled_label, label_sz);
3979 OBD_FREE(label, label_sz);