1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * lustre/mgs/mgs_llog.c
5 * Lustre Management Server (mgs) llog controller
7 * Copyright (C) 2001-2005 Cluster File Systems, Inc.
9 * This file is part of Lustre, http://www.lustre.org.
11 * Lustre is free software; you can redistribute it and/or
12 * modify it under the terms of version 2 of the GNU General Public
13 * License as published by the Free Software Foundation.
15 * Lustre is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with Lustre; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28 #define DEBUG_SUBSYSTEM S_MGS
29 #define D_MGS D_CONFIG|D_ERROR
32 #include <linux/module.h>
33 #include <linux/pagemap.h>
37 #include <linux/obd.h>
38 #include <linux/obd_class.h>
39 #include <linux/lustre_log.h>
40 #include <linux/obd_ost.h>
41 #include <libcfs/list.h>
42 #include <linux/lvfs.h>
43 #include <linux/lustre_fsfilt.h>
44 #include <linux/lustre_disk.h>
45 #include <linux/lustre_mgs.h>
46 #include "mgs_internal.h"
49 static inline int sv_name2index(char *svname, unsigned long *idx)
51 char *dash = strchr(svname, '-');
53 CERROR("Can't understand server name %s\n", svname);
56 *idx = simple_strtoul(dash + 4, NULL, 16);
61 /******************** DB functions *********************/
63 /* from the (client) config log, figure out:
64 1. which ost's/mdt's are configured (by index)
65 2. what the last config step is
67 /* FIXME is it better to have a separate db file, instead of parsing the info
68 out of the client log? */
69 static int mgsdb_handler(struct llog_handle *llh, struct llog_rec_hdr *rec,
72 struct fs_db *db = (struct fs_db *)data;
73 int cfg_len = rec->lrh_len;
74 char *cfg_buf = (char*) (rec + 1);
75 struct lustre_cfg *lcfg;
80 if (rec->lrh_type != OBD_CFG_REC) {
81 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
85 rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
87 CERROR("Insane cfg\n");
91 lcfg = (struct lustre_cfg *)cfg_buf;
93 CDEBUG(D_INFO, "cmd %x %s %s\n", lcfg->lcfg_command,
94 lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
96 /* Figure out ost indicies */
97 /* lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1 */
98 if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD ||
99 lcfg->lcfg_command == LCFG_LOV_DEL_OBD) {
100 index = simple_strtoul(lustre_cfg_string(lcfg, 2),
102 CDEBUG(D_MGS, "OST index for %s is %lu (%s)\n",
103 lustre_cfg_string(lcfg, 1), index,
104 lustre_cfg_string(lcfg, 2));
105 set_bit(index, db->fd_ost_index_map);
108 /* Figure out mdt indicies */
109 /* attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:1d834_MNT_client_03f */
110 if ((lcfg->lcfg_command == LCFG_ATTACH) &&
111 (strcmp(lustre_cfg_string(lcfg, 1), LUSTRE_MDC_NAME) == 0)) {
112 rc = sv_name2index(lustre_cfg_string(lcfg, 0), &index);
114 CWARN("Unparsable MDC name %s, assuming index 0\n",
115 lustre_cfg_string(lcfg, 0));
119 CDEBUG(D_MGS, "MDT index is %lu\n", index);
120 set_bit(index, db->fd_mdt_index_map);
123 /* Keep track of the latest marker step */
124 if (lcfg->lcfg_command == LCFG_MARKER) {
125 struct cfg_marker *marker;
126 marker = lustre_cfg_buf(lcfg, 1);
127 db->fd_gen = max(db->fd_gen, marker->cm_step);
128 CDEBUG(D_MGS, "marker %d %s\n", marker->cm_step,
135 static int mgs_get_db_from_llog(struct obd_device *obd, char *logname,
138 struct llog_handle *loghandle;
139 struct lvfs_run_ctxt saved;
142 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
144 rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
145 &loghandle, NULL, logname);
149 rc = llog_init_handle(loghandle, LLOG_F_IS_PLAIN, NULL);
153 rc = llog_process(loghandle, mgsdb_handler, (void *)db, NULL);
154 CDEBUG(D_MGS, "get_db = %d\n", rc);
156 rc2 = llog_close(loghandle);
161 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
166 static int next_index(void *index_map, int map_len)
169 for (i = 0; i < map_len * 8; i++)
170 if (!test_bit(i, index_map)) {
173 CERROR("max index %d exceeded.\n", i);
178 static int count_osts(void *index_map, int map_len)
181 for (i = 0, num = 0; i < map_len * 8; i++)
182 if (test_bit(i, index_map))
188 static struct fs_db *mgs_find_db(struct obd_device *obd, char *fsname)
190 struct mgs_obd *mgs = &obd->u.mgs;
192 struct list_head *tmp;
194 list_for_each(tmp, &mgs->mgs_fs_db_list) {
195 db = list_entry(tmp, struct fs_db, fd_list);
196 if (strcmp(db->fd_name, fsname) == 0)
202 #define INDEX_MAP_SIZE 4096
204 static struct fs_db *mgs_new_db(struct obd_device *obd, char *fsname)
206 struct mgs_obd *mgs = &obd->u.mgs;
210 OBD_ALLOC(db, sizeof(*db));
214 OBD_ALLOC(db->fd_ost_index_map, INDEX_MAP_SIZE);
215 OBD_ALLOC(db->fd_mdt_index_map, INDEX_MAP_SIZE);
216 if (!db->fd_ost_index_map || !db->fd_mdt_index_map) {
217 CERROR("No memory for index maps\n");
221 strncpy(db->fd_name, fsname, sizeof(db->fd_name));
222 //INIT_LIST_HEAD(&db->ost_infos);
224 spin_lock(&mgs->mgs_fs_db_lock);
225 list_add(&db->fd_list, &mgs->mgs_fs_db_list);
226 spin_unlock(&mgs->mgs_fs_db_lock);
230 if (db->fd_ost_index_map)
231 OBD_FREE(db->fd_ost_index_map, INDEX_MAP_SIZE);
232 if (db->fd_mdt_index_map)
233 OBD_FREE(db->fd_mdt_index_map, INDEX_MAP_SIZE);
234 OBD_FREE(db, sizeof(*db));
238 static void mgs_free_db(struct fs_db *db)
240 list_del(&db->fd_list);
241 OBD_FREE(db->fd_ost_index_map, INDEX_MAP_SIZE);
242 OBD_FREE(db->fd_mdt_index_map, INDEX_MAP_SIZE);
243 OBD_FREE(db, sizeof(*db));
246 int mgs_init_db_list(struct obd_device *obd)
248 struct mgs_obd *mgs = &obd->u.mgs;
249 spin_lock_init(&mgs->mgs_fs_db_lock);
250 INIT_LIST_HEAD(&mgs->mgs_fs_db_list);
254 int mgs_cleanup_db_list(struct obd_device *obd)
256 struct mgs_obd *mgs = &obd->u.mgs;
258 struct list_head *tmp, *tmp2;
259 spin_lock(&mgs->mgs_fs_db_lock);
260 list_for_each_safe(tmp, tmp2, &mgs->mgs_fs_db_list) {
261 db = list_entry(tmp, struct fs_db, fd_list);
264 spin_unlock(&mgs->mgs_fs_db_lock);
268 static inline int name_create(char *prefix, char *suffix, char **newname)
271 OBD_ALLOC(*newname, strlen(prefix) + strlen(suffix) + 1);
274 sprintf(*newname, "%s%s", prefix, suffix);
278 static inline void name_destroy(char *newname)
281 OBD_FREE(newname, strlen(newname) + 1);
285 static int mgs_find_or_make_db(struct obd_device *obd, char *name,
292 db = mgs_find_db(obd, name);
298 CDEBUG(D_MGS, "Creating new db\n");
299 db = mgs_new_db(obd, name);
303 /* populate the db from the client llog */
304 name_create(name, "-client", &cliname);
305 rc = mgs_get_db_from_llog(obd, cliname, db);
306 name_destroy(cliname);
308 CERROR("Can't get db from llog %d\n", rc);
318 int mgs_set_index(struct obd_device *obd, struct mgs_target_info *mti)
325 rc = mgs_find_or_make_db(obd, mti->mti_fsname, &db);
327 CERROR("Can't get db for %s\n", mti->mti_fsname);
331 if (mti->mti_flags & LDD_F_SV_TYPE_OST)
332 imap = db->fd_ost_index_map;
333 else if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
334 imap = db->fd_mdt_index_map;
338 if (mti->mti_flags & LDD_F_NEED_INDEX) {
339 rc = next_index(imap, INDEX_MAP_SIZE);
342 mti->mti_stripe_index = rc;
345 /* Remove after CMD */
346 if ((mti->mti_flags & LDD_F_SV_TYPE_MDT) &&
347 (mti->mti_stripe_index > 0)) {
348 LCONSOLE_ERROR("MDT index must = 0 (until Clustered MetaData "
349 "feature is ready.)\n");
350 mti->mti_stripe_index = 0;
353 if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8) {
354 LCONSOLE_ERROR("Server %s requested index %d, but the"
355 "max index is %d.\n",
356 mti->mti_svname, mti->mti_stripe_index,
361 if (test_bit(mti->mti_stripe_index, imap)) {
362 LCONSOLE_ERROR("Server %s requested index %d, but that "
363 "index is already in use in the %s "
364 "filesystem. This server "
365 "may have been reformatted, or the "
366 "index changed. (To reformat the entire "
367 "filesystem, specify 'destroy_fs' "
368 "when reformatting a MDT.)\n",
369 mti->mti_svname, mti->mti_stripe_index,
371 /* FIXME implement destroy_fs! */
375 set_bit(mti->mti_stripe_index, imap);
376 sv_make_name(mti->mti_flags, mti->mti_stripe_index,
377 mti->mti_fsname, mti->mti_svname);
379 CDEBUG(D_MGS, "Set new index for %s to %d\n", mti->mti_svname,
380 mti->mti_stripe_index);
385 /******************** config log recording functions *********************/
387 static int mgs_do_record(struct obd_device *obd, struct llog_handle *llh,
388 struct lustre_cfg *lcfg)
390 struct lvfs_run_ctxt saved;
391 struct llog_rec_hdr rec;
395 LASSERT(llh->lgh_ctxt);
397 buflen = lustre_cfg_len(lcfg->lcfg_bufcount,
399 rec.lrh_len = llog_data_len(buflen);
400 rec.lrh_type = OBD_CFG_REC;
402 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
403 /* idx = -1 means append */
404 rc = llog_write_rec(llh, &rec, NULL, 0, (void *)lcfg, -1);
405 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
407 CERROR("failed %d\n", rc);
413 static int record_base(struct obd_device *obd, struct llog_handle *llh,
414 char *cfgname, lnet_nid_t nid, int cmd,
415 char *s1, char *s2, char *s3, char *s4)
417 struct lustre_cfg_bufs bufs;
418 struct lustre_cfg *lcfg;
421 CDEBUG(D_MGS, "lcfg %s %#x %s %s %s %s\n", cfgname,
422 cmd, s1, s2, s3, s4);
424 lustre_cfg_bufs_reset(&bufs, cfgname);
426 lustre_cfg_bufs_set_string(&bufs, 1, s1);
428 lustre_cfg_bufs_set_string(&bufs, 2, s2);
430 lustre_cfg_bufs_set_string(&bufs, 3, s3);
432 lustre_cfg_bufs_set_string(&bufs, 4, s4);
434 lcfg = lustre_cfg_new(cmd, &bufs);
435 lcfg->lcfg_nid = nid;
437 rc = mgs_do_record(obd, llh, lcfg);
439 lustre_cfg_free(lcfg);
442 CERROR("error %d: lcfg %s %#x %s %s %s %s\n", rc, cfgname,
443 cmd, s1, s2, s3, s4);
449 static inline int record_add_uuid(struct obd_device *obd,
450 struct llog_handle *llh,
451 uint64_t nid, char *uuid)
453 return record_base(obd,llh,NULL,nid,LCFG_ADD_UUID,uuid,0,0,0);
457 static inline int record_add_conn(struct obd_device *obd,
458 struct llog_handle *llh,
462 return record_base(obd,llh,devname,0,LCFG_ADD_CONN,uuid,0,0,0);
465 static inline int record_attach(struct obd_device *obd, struct llog_handle *llh,
466 char *devname, char *type, char *uuid)
468 return record_base(obd,llh,devname,0,LCFG_ATTACH,type,uuid,0,0);
471 static inline int record_setup(struct obd_device *obd, struct llog_handle *llh,
473 char *s1, char *s2, char *s3, char *s4)
475 return record_base(obd,llh,devname,0,LCFG_SETUP,s1,s2,s3,s4);
478 static int record_lov_setup(struct obd_device *obd, struct llog_handle *llh,
479 char *devname, struct lov_desc *desc)
481 struct lustre_cfg_bufs bufs;
482 struct lustre_cfg *lcfg;
485 lustre_cfg_bufs_reset(&bufs, devname);
486 lustre_cfg_bufs_set(&bufs, 1, desc, sizeof(*desc));
487 lcfg = lustre_cfg_new(LCFG_SETUP, &bufs);
489 rc = mgs_do_record(obd, llh, lcfg);
491 lustre_cfg_free(lcfg);
495 static inline int record_lov_add(struct obd_device *obd,
496 struct llog_handle *llh,
497 char *lov_name, char *ost_uuid,
498 char *index, char *gen)
500 return record_base(obd,llh,lov_name,0,LCFG_LOV_ADD_OBD,
501 ost_uuid,index,gen,0);
504 static inline int record_mount_opt(struct obd_device *obd,
505 struct llog_handle *llh,
506 char *profile, char *lov_name,
509 return record_base(obd,llh,NULL,0,LCFG_MOUNTOPT,
510 profile,lov_name,mdc_name,0);
513 static int record_marker(struct obd_device *obd, struct llog_handle *llh,
514 struct fs_db *db, __u32 flags, char *comment)
516 struct cfg_marker marker;
517 struct lustre_cfg_bufs bufs;
518 struct lustre_cfg *lcfg;
521 CDEBUG(D_MGS, "lcfg marker\n");
523 if (flags & CM_START)
525 marker.cm_step = db->fd_gen;
526 marker.cm_flags = flags;
527 strncpy(marker.cm_comment, comment, sizeof(marker.cm_comment));
528 lustre_cfg_bufs_reset(&bufs, NULL);
529 lustre_cfg_bufs_set(&bufs, 1, &marker, sizeof(marker));
530 lcfg = lustre_cfg_new(LCFG_MARKER, &bufs);
532 rc = mgs_do_record(obd, llh, lcfg);
534 lustre_cfg_free(lcfg);
538 static int record_start_log(struct obd_device *obd,
539 struct llog_handle **llh, char *name)
541 static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" };
542 struct lvfs_run_ctxt saved;
546 GOTO(out, rc = -EBUSY);
549 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
551 rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
554 llog_init_handle(*llh, LLOG_F_IS_PLAIN, &cfg_uuid);
558 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
562 CERROR("Can't start log %s: %d\n", name, rc);
567 static int record_end_log(struct obd_device *obd, struct llog_handle **llh)
569 struct lvfs_run_ctxt saved;
572 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
574 rc = llog_close(*llh);
577 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
581 static int mgs_log_is_empty(struct obd_device *obd, char *name)
583 struct lvfs_run_ctxt saved;
584 struct llog_handle *llh;
587 /* FIXME cache the empty state in the db */
589 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
590 rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
593 llog_init_handle(llh, LLOG_F_IS_PLAIN, NULL);
594 rc = llog_get_size(llh);
597 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
598 /* header is record 1 */
602 /******************** config "macros" *********************/
604 /* lov is the first thing in the mdt and client logs */
605 static int mgs_write_log_lov(struct obd_device *obd, struct fs_db *db,
606 struct mgs_target_info *mti,
607 char *logname, char *lovname)
609 struct llog_handle *llh = NULL;
610 struct lov_desc *lovdesc;
615 CDEBUG(D_MGS, "Writing log %s\n", logname);
618 #01 L attach 0:lov_mdsA 1:lov 2:71ccb_lov_mdsA_19f961a9e1
619 #02 L lov_setup 0:lov_mdsA 1:(struct lov_desc)
620 uuid=lov1_UUID, stripe count=1, size=1048576, offset=0, pattern=0
623 /* FIXME just make lov_setup accept empty desc (put uuid in buf 2) */
624 OBD_ALLOC(lovdesc, sizeof(*lovdesc));
627 /* Use defaults here, will fix them later with LCFG_PARAM */
628 lovdesc->ld_magic = LOV_DESC_MAGIC;
629 lovdesc->ld_tgt_count = 0;
630 lovdesc->ld_default_stripe_count = mti->mti_stripe_count;
631 lovdesc->ld_pattern = mti->mti_stripe_pattern;
632 lovdesc->ld_default_stripe_size = mti->mti_stripe_size;
633 lovdesc->ld_default_stripe_offset = mti->mti_stripe_offset;
634 sprintf((char*)lovdesc->ld_uuid.uuid, "%s_UUID", lovname);
635 /* can these be the same? */
636 uuid = (char *)lovdesc->ld_uuid.uuid;
638 /* This should always be the first entry in a log.
639 rc = mgs_clear_log(obd, logname); */
640 rc = record_start_log(obd, &llh, logname);
641 rc = record_marker(obd, llh, db, CM_START, "lov setup");
642 rc = record_attach(obd, llh, lovname, "lov", uuid);
643 rc = record_lov_setup(obd, llh, lovname, lovdesc);
644 rc = record_marker(obd, llh, db, CM_END, "lov setup");
645 rc = record_end_log(obd, &llh);
647 OBD_FREE(lovdesc, sizeof(*lovdesc));
651 static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *db,
652 struct mgs_target_info *mti)
654 struct llog_handle *llh = NULL;
655 char *cliname, *mdcname, *lovname, *nodeuuid, *mdcuuid;
657 int rc, i, first_log = 0;
660 CDEBUG(D_MGS, "writing new mdt %s\n", mti->mti_svname);
662 if (*mti->mti_uuid == 0) {
663 snprintf(mti->mti_uuid, sizeof(mti->mti_uuid),
664 "%s_UUID", mti->mti_svname);
665 name_create(mti->mti_fsname, "-mdtlov", &lovname);
667 /* We're starting with an old uuid. Assume old name for lov
669 /* FIXME parse mds name out of uuid */
670 name_create("lov", "_mdsA", &lovname);
673 /* Append mdt info to mdt log */
674 if (mgs_log_is_empty(obd, mti->mti_svname)) {
675 /* This is the first time for all logs for this fs,
676 since any ost should have already started the mdt log. */
678 rc = mgs_write_log_lov(obd, db, mti, mti->mti_svname,
681 /* else there's already some ost entries in the mdt log. */
683 /* We added the lov, maybe some osc's, now for the mdt.
684 We might add more ost's after this. Note that during the parsing
685 of this log, this is when the mdt will start. (This was not
686 formerly part of the old mds log, it was directly executed by
689 #09 L mount_option 0: 1:mdsA 2:lov_mdsA
690 attach mds mdsA mdsA_UUID
691 setup /dev/loop2 ldiskfs mdsA errors=remount-ro,user_xattr
693 rc = record_start_log(obd, &llh, mti->mti_svname);
694 rc = record_marker(obd, llh, db, CM_START, "add mdt");
695 rc = record_mount_opt(obd, llh, mti->mti_svname, lovname, 0);
696 rc = record_attach(obd, llh, mti->mti_svname, LUSTRE_MDS_NAME,
698 rc = record_setup(obd,llh,mti->mti_svname,
699 "dev"/*ignored*/,"type"/*ignored*/,
700 mti->mti_svname, 0/*options*/);
701 rc = record_marker(obd, llh, db, CM_END, "add mdt");
702 rc = record_end_log(obd, &llh);
704 if (mti->mti_flags & LDD_F_UPGRADE14)
705 /* If we're upgrading, the client log is done. */
708 /* Append the mdt info to the client log */
709 name_create(mti->mti_fsname, "-client", &cliname);
710 name_destroy(lovname);
711 name_create(mti->mti_fsname, "-clilov", &lovname);
713 /* Start client log */
714 rc = mgs_write_log_lov(obd, db, mti, cliname, lovname);
717 name_create(libcfs_nid2str(mti->mti_nids[0]), /*"_UUID"*/"", &nodeuuid);
718 name_create(mti->mti_svname, "-mdc", &mdcname);
719 name_create(mdcname, "_UUID", &mdcuuid);
721 #09 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0: 1:uml1_UUID
722 #10 L attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:1d834_MNT_client_03f
723 #11 L setup 0:MDC_uml1_mdsA_MNT_client 1:mdsA_UUID 2:uml1_UUID
724 #12 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0: 1:uml2_UUID
725 #13 L add_conn 0:MDC_uml1_mdsA_MNT_client 1:uml2_UUID
726 #14 L mount_option 0: 1:client 2:lov1 3:MDC_uml1_mdsA_MNT_client
728 rc = record_start_log(obd, &llh, cliname);
729 rc = record_marker(obd, llh, db, CM_START, "add mdc");
730 for (i = 0; i < mti->mti_nid_count; i++) {
731 CDEBUG(D_MGS, "add nid %s\n", libcfs_nid2str(mti->mti_nids[i]));
732 rc = record_add_uuid(obd, llh, mti->mti_nids[i], nodeuuid);
734 rc = record_attach(obd, llh, mdcname, LUSTRE_MDC_NAME, mdcuuid);
735 rc = record_setup(obd, llh, mdcname, mti->mti_uuid,nodeuuid, 0, 0);
736 for (i = 0; i < mti->mti_failnid_count; i++) {
737 nid = mti->mti_failnids[i];
738 CDEBUG(D_MGS, "add failover nid %s\n", libcfs_nid2str(nid));
739 rc = record_add_uuid(obd, llh, nid, libcfs_nid2str(nid));
740 rc = record_add_conn(obd, llh, mdcname, libcfs_nid2str(nid));
742 rc = record_mount_opt(obd, llh, cliname, lovname, mdcname);
743 rc = record_marker(obd, llh, db, CM_END, "add mdc");
744 rc = record_end_log(obd, &llh);
746 name_destroy(mdcuuid);
747 name_destroy(mdcname);
748 name_destroy(nodeuuid);
749 name_destroy(cliname);
751 name_destroy(lovname);
755 /* Add the ost info to the client/mdt lov */
756 static int mgs_write_log_osc(struct obd_device *obd, struct fs_db *db,
757 struct mgs_target_info *mti,
758 char *logname, char *lovname)
760 struct llog_handle *llh = NULL;
761 char *nodeuuid, *oscname, *oscuuid, *lovuuid;
766 if (mgs_log_is_empty(obd, logname)) {
767 /* The first time an osc is added, setup the lov */
768 rc = mgs_write_log_lov(obd, db, mti, logname, lovname);
771 CDEBUG(D_MGS, "adding osc for %s to log %s\n",
772 mti->mti_svname, logname);
774 name_create(libcfs_nid2str(mti->mti_nids[0]), /*"_UUID"*/"", &nodeuuid);
775 name_create(mti->mti_svname, "-osc", &oscname);
776 name_create(oscname, "_UUID", &oscuuid);
777 name_create(lovname, "_UUID", &lovuuid);
780 #03 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0: 1:uml1_UUID
781 #04 L attach 0:OSC_uml1_ost1_MNT_client 1:osc 2:89070_lov1_a41dff51a
782 #05 L setup 0:OSC_uml1_ost1_MNT_client 1:ost1_UUID 2:uml1_UUID
783 #06 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0: 1:uml2_UUID
784 #07 L add_conn 0:OSC_uml1_ost1_MNT_client 1:uml2_UUID
785 #08 L lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1
787 rc = record_start_log(obd, &llh, logname);
788 rc = record_marker(obd, llh, db, CM_START, "add osc");
789 for (i = 0; i < mti->mti_nid_count; i++) {
790 CDEBUG(D_MGS, "add nid %s\n", libcfs_nid2str(mti->mti_nids[i]));
791 rc = record_add_uuid(obd, llh, mti->mti_nids[i], nodeuuid);
793 rc = record_attach(obd, llh, oscname, LUSTRE_OSC_NAME, lovuuid);
794 rc = record_setup(obd, llh, oscname, mti->mti_uuid, nodeuuid, 0, 0);
795 for (i = 0; i < mti->mti_failnid_count; i++) {
796 nid = mti->mti_failnids[i];
797 CDEBUG(D_MGS, "add failover nid %s\n", libcfs_nid2str(nid));
798 rc = record_add_uuid(obd, llh, nid, libcfs_nid2str(nid));
799 rc = record_add_conn(obd, llh, oscname, libcfs_nid2str(nid));
801 snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
802 rc = record_lov_add(obd, llh, lovname, mti->mti_uuid, index, "1");
803 rc = record_marker(obd, llh, db, CM_END, "add osc");
804 rc = record_end_log(obd, &llh);
806 name_destroy(lovuuid);
807 name_destroy(oscuuid);
808 name_destroy(oscname);
809 name_destroy(nodeuuid);
813 static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *db,
814 struct mgs_target_info *mti)
816 struct llog_handle *llh = NULL;
817 char *logname, *lovname;
821 CDEBUG(D_MGS, "writing new ost %s\n", mti->mti_svname);
823 /* The ost startup log */
825 /* If the ost log already exists, that means that someone reformatted
826 the ost and it called target_add again.
827 FIXME check and warn here, maybe inc config ver #? Or abort,
828 and claim there's already a server with that name? Maybe need
829 another flag to say it's okay to rewrite.
830 Heck, what do we do about the client and mds logs? We better
832 if (!mgs_log_is_empty(obd, mti->mti_svname)) {
833 LCONSOLE_ERROR("The config log for %s already exists, yet the "
834 "server claims it never registered. It may have"
835 " been reformatted, or the index changed. This "
836 "must be resolved before this server can be "
837 "added.\n", mti->mti_svname);
841 attach obdfilter ost1 ost1_UUID
842 setup /dev/loop2 ldiskfs f|n errors=remount-ro,user_xattr
844 rc = record_start_log(obd, &llh, mti->mti_svname);
845 rc = record_marker(obd, llh, db, CM_START, "add ost");
846 if (*mti->mti_uuid == 0)
847 snprintf(mti->mti_uuid, sizeof(mti->mti_uuid),
848 "%s_UUID", mti->mti_svname);
849 rc = record_attach(obd, llh, mti->mti_svname,
850 "obdfilter"/*LUSTRE_OST_NAME*/, mti->mti_uuid);
851 rc = record_setup(obd,llh,mti->mti_svname,
852 "dev"/*ignored*/,"type"/*ignored*/,
854 rc = record_marker(obd, llh, db, CM_END, "add ost");
855 rc = record_end_log(obd, &llh);
857 if (mti->mti_flags & LDD_F_UPGRADE14)
858 /* If we're upgrading, the client log is done. */
861 /* We also have to update the other logs where this osc is part of
863 /* Append ost info to mdt log */
864 // FIXME need real mdt name -- but MDT may not have registered yet!
865 // FIXME add to all mdt logs for CMD
866 name_create(mti->mti_fsname, "-MDT0000", &logname);
867 name_create(mti->mti_fsname, "-mdtlov", &lovname);
868 mgs_write_log_osc(obd, db, mti, logname, lovname);
869 name_destroy(lovname);
870 name_destroy(logname);
872 /* Append ost info to the client log */
873 name_create(mti->mti_fsname, "-client", &logname);
874 name_create(mti->mti_fsname, "-clilov", &lovname);
875 mgs_write_log_osc(obd, db, mti, logname, lovname);
876 name_destroy(lovname);
877 name_destroy(logname);
882 int mgs_write_log_target(struct obd_device *obd,
883 struct mgs_target_info *mti)
888 /* set/check the new target index */
889 rc = mgs_set_index(obd, mti);
891 CERROR("Can't get index (%d)\n", rc);
895 rc = mgs_find_or_make_db(obd, mti->mti_fsname, &db);
897 CERROR("Can't get db for %s\n", mti->mti_fsname);
901 if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
902 rc = mgs_write_log_mdt(obd, db, mti);
903 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
904 rc = mgs_write_log_ost(obd, db, mti);
906 CERROR("Unknown target type %#x, can't create log for %s\n",
907 mti->mti_flags, mti->mti_svname);
914 /***************** upgrade pre-mountconf logs to mountconf *****************/
916 int mgs_upgrade_logs_14(struct obd_device *obd, struct fs_db *db,
917 struct mgs_target_info *mti)
922 CDEBUG(D_MGS, "Upgrading old logs for %s\n", mti->mti_fsname);
924 /* If we get here, we know:
925 the client log fsname-client exists
926 the logs have not been updated
928 1. parse the old client log (client log name?) to find out UUIDs for
930 2. regen all ost logs: servers will get new
931 name based on index, but will keep their old uuids.
932 3. append mdt startup to the end of the mdt log
933 4. append marker to old client log signifying we did the upgrade
934 ? translate mds/client logs to new names?
935 2 UP mdt MDS MDS_uuid 3
936 3 UP lov lov_mdsA 47d06_lov_mdsA_61f31f85bc 4
937 4 UP osc OSC_uml1_ost1_mdsA 47d06_lov_mdsA_61f31f85bc 4
938 5 UP osc OSC_uml1_ost2_mdsA 47d06_lov_mdsA_61f31f85bc 4
939 6 UP mds lustre-MDT0000 mdsA_UUID 3
941 ? update server uuids?
944 /* FIXME hardcoded for proof-of-concept. Really, we have to parse the
945 old logs to find osts, lov & mdc for client mountopt. */
947 if (!(mti->mti_flags & LDD_F_SV_TYPE_MDT)) {
948 CERROR("MDT first\n");
954 CDEBUG(D_MGS, "Upgrade MDT\n");
955 /* Need to set the mdsuuid first */
956 mti->mti_stripe_index = 0;
957 sv_make_name(mti->mti_flags, mti->mti_stripe_index,
958 mti->mti_fsname, mti->mti_svname);
959 sprintf(mti->mti_uuid, "mdsA_UUID");
960 if (mgs_log_is_empty(obd, mti->mti_svname)) {
961 CERROR("The MDT log %s is missing.\n", mti->mti_svname);
964 /* FIXME Old logs already have an old mount opt
965 which we should drop */
966 rc = mgs_write_log_mdt(obd, db, mti);
970 /* Write the ost logs */
971 struct mgs_target_info omti;
972 CDEBUG(D_MGS, "Upgrade OST\n");
974 /* these indicies were already marked by mgs_db_handler */
976 omti.mti_flags |= LDD_F_SV_TYPE_OST;
977 omti.mti_flags &= ~(LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_MGS);
978 omti.mti_stripe_index = 0;
979 sv_make_name(omti.mti_flags, omti.mti_stripe_index,
980 omti.mti_fsname, omti.mti_svname);
981 sprintf(omti.mti_uuid, "ost1_UUID");
982 if (!mgs_log_is_empty(obd, omti.mti_svname)) {
983 CERROR("The OST log %s already exists.\n",
986 rc = mgs_write_log_ost(obd, db, &omti);
989 omti.mti_stripe_index = 1;
990 sv_make_name(omti.mti_flags, omti.mti_stripe_index,
991 omti.mti_fsname, omti.mti_svname);
992 sprintf(omti.mti_uuid, "ost2_UUID");
993 if (!mgs_log_is_empty(obd, omti.mti_svname)) {
994 CERROR("The OST log %s already exists.\n",
997 rc = mgs_write_log_ost(obd, db, &omti);
1002 struct llog_handle *llh = NULL;
1004 CDEBUG(D_MGS, "Upgrade client\n");
1006 name_create(mti->mti_fsname, "-client", &cliname);
1008 /* Mark the client log so we know we updated (fd_gen == 1) */
1009 rc = record_start_log(obd, &llh, cliname);
1010 rc = record_marker(obd, llh, db, CM_START, "upgrade from 1.4");
1011 /* FIXME find the old lovname and mdcname */
1012 /* old: mount_option 0: 1:client 2:lov1 3:MDC_uml1_mdsA_MNT_client */
1013 /* new: mount_option 0: 1:lustre-client 2:lustre-clilov 3:lustre-MDT0000-mdc */
1014 rc = record_mount_opt(obd, llh, cliname, "lov1", "MDC_uml1_mdsA_MNT_client");
1015 rc = record_marker(obd, llh, db, CM_END, "upgrade to 1.6");
1016 rc = record_end_log(obd, &llh);
1017 name_destroy(cliname);
1023 /* Make newly-connecting upgraded servers happy. */
1024 int mgs_upgrade_sv_14(struct obd_device *obd, struct mgs_target_info *mti)
1030 rc = mgs_find_or_make_db(obd, mti->mti_fsname, &db);
1032 LCONSOLE_ERROR("The 1.4 log for fs %s in %s is unreadable, "
1033 "I can't upgrade it.\n",
1034 mti->mti_fsname, MOUNT_CONFIGS_DIR);
1038 if (db->fd_gen == 0) {
1039 /* There were no markers in the client log, meaning we have
1040 not updated the logs for this fs */
1041 rc = mgs_upgrade_logs_14(obd, db, mti);
1048 /* end COMPAT_146 */
1052 /******************** unused *********************/
1053 static int mgs_backup_llog(struct obd_device *obd, char* fsname)
1055 struct file *filp, *bak_filp;
1056 struct lvfs_run_ctxt saved;
1057 char *logname, *buf;
1058 loff_t soff = 0 , doff = 0;
1059 int count = 4096, len;
1062 OBD_ALLOC(logname, PATH_MAX);
1063 if (logname == NULL)
1066 OBD_ALLOC(buf, count);
1068 GOTO(out , rc = -ENOMEM);
1070 len = snprintf(logname, PATH_MAX, "%s/%s.bak",
1071 MOUNT_CONFIGS_DIR, fsname);
1073 if (len >= PATH_MAX - 1) {
1074 GOTO(out, -ENAMETOOLONG);
1077 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
1079 bak_filp = l_filp_open(logname, O_RDWR|O_CREAT|O_TRUNC, 0660);
1080 if (IS_ERR(bak_filp)) {
1081 rc = PTR_ERR(bak_filp);
1082 CERROR("backup logfile open %s: %d\n", logname, rc);
1085 sprintf(logname, "%s/%s", MOUNT_CONFIGS_DIR, fsname);
1086 filp = l_filp_open(logname, O_RDONLY, 0);
1089 CERROR("logfile open %s: %d\n", logname, rc);
1093 while ((rc = lustre_fread(filp, buf, count, &soff)) > 0) {
1094 rc = lustre_fwrite(bak_filp, buf, count, &doff);
1098 filp_close(filp, 0);
1100 filp_close(bak_filp, 0);
1102 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
1105 OBD_FREE(buf, count);
1106 OBD_FREE(logname, PATH_MAX);
1110 static int mgs_clear_log(struct obd_device *obd, char *name)
1112 struct lvfs_run_ctxt saved;
1113 struct llog_handle *llh;
1116 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
1117 rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
1120 llog_init_handle(llh, LLOG_F_IS_PLAIN, NULL);
1121 rc = llog_destroy(llh);
1122 llog_free_handle(llh);
1124 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
1127 CERROR("failed to clear log %s: %d\n", name, rc);
1132 /* from mdt_iocontrol */
1133 int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
1134 void *karg, void *uarg)
1136 static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" };
1137 struct obd_device *obd = exp->exp_obd;
1138 struct mgs_obd *mgs = &obd->u.mgs;
1139 struct obd_ioctl_data *data = karg;
1140 struct lvfs_run_ctxt saved;
1144 CDEBUG(D_IOCTL, "handling ioctl cmd %#x\n", cmd);
1147 case OBD_IOC_RECORD: {
1148 char *name = data->ioc_inlbuf1;
1149 if (mgs->mgs_cfg_llh)
1152 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
1153 rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
1154 &mgs->mgs_cfg_llh, NULL, name);
1156 llog_init_handle(mgs->mgs_cfg_llh, LLOG_F_IS_PLAIN,
1159 mgs->mgs_cfg_llh = NULL;
1160 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
1164 case OBD_IOC_ENDRECORD: {
1165 if (!mgs->mgs_cfg_llh)
1168 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
1169 rc = llog_close(mgs->mgs_cfg_llh);
1170 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
1172 mgs->mgs_cfg_llh = NULL;
1176 case OBD_IOC_CLEAR_LOG: {
1177 char *name = data->ioc_inlbuf1;
1178 if (mgs->mgs_cfg_llh)
1181 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
1182 rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
1183 &mgs->mgs_cfg_llh, NULL, name);
1185 llog_init_handle(mgs->mgs_cfg_llh, LLOG_F_IS_PLAIN,
1188 rc = llog_destroy(mgs->mgs_cfg_llh);
1189 llog_free_handle(mgs->mgs_cfg_llh);
1191 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
1193 mgs->mgs_cfg_llh = NULL;
1197 case OBD_IOC_DORECORD: {
1199 struct llog_rec_hdr rec;
1200 if (!mgs->mgs_cfg_llh)
1203 rec.lrh_len = llog_data_len(data->ioc_plen1);
1205 if (data->ioc_type == LUSTRE_CFG_TYPE) {
1206 rec.lrh_type = OBD_CFG_REC;
1208 CERROR("unknown cfg record type:%d \n", data->ioc_type);
1212 OBD_ALLOC(cfg_buf, data->ioc_plen1);
1213 if (cfg_buf == NULL)
1215 rc = copy_from_user(cfg_buf, data->ioc_pbuf1, data->ioc_plen1);
1217 OBD_FREE(cfg_buf, data->ioc_plen1);
1221 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
1222 rc = llog_write_rec(mgs->mgs_cfg_llh, &rec, NULL, 0,
1224 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
1226 OBD_FREE(cfg_buf, data->ioc_plen1);
1230 case OBD_IOC_PARSE: {
1231 struct llog_ctxt *ctxt =
1232 llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
1233 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
1234 rc = class_config_parse_llog(ctxt, data->ioc_inlbuf1, NULL);
1235 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
1242 case OBD_IOC_DUMP_LOG: {
1243 struct llog_ctxt *ctxt =
1244 llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
1245 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
1246 rc = class_config_dump_llog(ctxt, data->ioc_inlbuf1, NULL);
1247 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
1254 case OBD_IOC_SYNC: {
1255 CDEBUG(D_HA, "syncing mgs %s\n", obd->obd_name);
1256 rc = fsfilt_sync(obd, obd->u.mgs.mgs_sb);
1260 case OBD_IOC_SET_READONLY: {
1262 struct inode *inode = obd->u.mgs.mgs_sb->s_root->d_inode;
1263 BDEVNAME_DECLARE_STORAGE(tmp);
1264 CERROR("*** setting device %s read-only ***\n",
1265 ll_bdevname(obd->u.mgs.mgs_sb, tmp));
1267 handle = fsfilt_start(obd, inode, FSFILT_OP_MKNOD, NULL);
1268 if (!IS_ERR(handle))
1269 rc = fsfilt_commit(obd, inode, handle, 1);
1271 CDEBUG(D_HA, "syncing mgs %s\n", obd->obd_name);
1272 rc = fsfilt_sync(obd, obd->u.mgs.mgs_sb);
1274 lvfs_set_rdonly(lvfs_sbdev(obd->u.mgs.mgs_sb));
1278 case OBD_IOC_LLOG_CHECK:
1279 case OBD_IOC_LLOG_CANCEL:
1280 case OBD_IOC_LLOG_REMOVE: {
1281 struct llog_ctxt *ctxt =
1282 llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
1284 push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
1285 rc = llog_ioctl(ctxt, cmd, data);
1286 pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
1290 case OBD_IOC_LLOG_INFO:
1291 case OBD_IOC_LLOG_PRINT: {
1292 struct llog_ctxt *ctxt =
1293 llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
1295 push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
1296 rc = llog_ioctl(ctxt, cmd, data);
1297 pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
1303 CDEBUG(D_INFO, "unknown command %x\n", cmd);