From deb1d73396f404fba0e277fd79b8e97cd17903b1 Mon Sep 17 00:00:00 2001 From: Bobi Jam Date: Thu, 31 Mar 2011 09:20:31 +0800 Subject: [PATCH] LU-57 Allow OSTs to be created with no primary node Add a --servicenode parameter for mkfs.lustre to treat all service nodes equally. Bugzilla: 19064 Change-Id: I7851cb7781cdaf59d2cf18690d2da35121432cc1 Signed-off-by: Bobi Jam --- lustre/doc/mkfs.lustre.8 | 5 +++++ lustre/include/lustre_disk.h | 17 +++++++++-------- lustre/mgs/mgs_handler.c | 3 ++- lustre/mgs/mgs_llog.c | 1 - lustre/obdclass/obd_mount.c | 15 +++++++++++++++ lustre/tests/conf-sanity.sh | 15 ++++++++++++--- lustre/utils/mkfs_lustre.c | 31 +++++++++++++++++++++++++++---- 7 files changed, 70 insertions(+), 17 deletions(-) diff --git a/lustre/doc/mkfs.lustre.8 b/lustre/doc/mkfs.lustre.8 index d79a4cc..80ed09a 100644 --- a/lustre/doc/mkfs.lustre.8 +++ b/lustre/doc/mkfs.lustre.8 @@ -47,6 +47,11 @@ Only print what would be done; does not affect the disk .TP .BI \--failnode= nid,... Set the NID(s) of a failover partner. This option can be repeated as desired. +Cannot be used with --servicenode. +.TP +.BI \--servicenode= nid,.... +Set the NID(s) of all service partner. This option treats all nodes as equal +service nodes. Cannot be used with --failnode. .TP .BI \--fsname= filesystem_name The Lustre filesystem this service will be part of. The maximum diff --git a/lustre/include/lustre_disk.h b/lustre/include/lustre_disk.h index 7446b9f..99a5e7f 100644 --- a/lustre/include/lustre_disk.h +++ b/lustre/include/lustre_disk.h @@ -68,6 +68,7 @@ #define LDD_F_WRITECONF 0x0100 /* regenerate all logs for this fs */ #define LDD_F_UPGRADE14 0x0200 /* COMPAT_14 */ #define LDD_F_PARAM 0x0400 /* process as lctl conf_param */ +#define LDD_F_NO_PRIMNODE 0x1000 /* all nodes are specified as servicenodes */ enum ldd_mount_type { LDD_MT_EXT3 = 0, @@ -104,7 +105,7 @@ struct lustre_disk_data { __u32 ldd_config_ver; /* config rewrite count - not used */ __u32 ldd_flags; /* LDD_SV_TYPE */ - __u32 ldd_svindex; /* server index (0001), must match + __u32 ldd_svindex; /* server index (0001), must match svname */ __u32 ldd_mount_type; /* target fs type LDD_MT_* */ char ldd_fsname[64]; /* filesystem this server is part of */ @@ -128,7 +129,7 @@ static inline int server_make_name(__u32 flags, __u16 index, char *fs, { if (flags & (LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST)) { sprintf(name, "%.8s-%s%04x", fs, - (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST", + (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST", index); } else if (flags & LDD_F_SV_TYPE_MGS) { sprintf(name, "MGS"); @@ -145,7 +146,7 @@ int server_name2index(char *svname, __u32 *idx, char **endptr); /****************** mount command *********************/ -/* The lmd is only used internally by Lustre; mount simply passes +/* The lmd is only used internally by Lustre; mount simply passes everything as string options */ #define LMD_MAGIC 0xbdacbd03 @@ -160,7 +161,7 @@ struct lustre_mount_data { int lmd_recovery_time_hard; char *lmd_dev; /* device name */ char *lmd_profile; /* client only */ - char *lmd_opts; /* lustre mount options (as opposed to + char *lmd_opts; /* lustre mount options (as opposed to _device_ mount options) */ __u32 *lmd_exclude; /* array of OSTs to ignore */ }; @@ -174,7 +175,7 @@ struct lustre_mount_data { existing MGS services */ #define LMD_FLG_WRITECONF 0x0040 /* Rewrite config log */ -#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT) +#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT) /****************** last_rcvd file *********************/ @@ -272,7 +273,7 @@ static inline void check_lcd(char *obd_name, int index, lcd->lcd_uuid[length - 1] = '\0'; LCONSOLE_ERROR("the client UUID (%s) on %s for exports" - "stored in last_rcvd(index = %d) is bad!\n", + "stored in last_rcvd(index = %d) is bad!\n", lcd->lcd_uuid, obd_name, index); } } @@ -331,9 +332,9 @@ void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb)); int lustre_common_put_super(struct super_block *sb); -int lustre_process_log(struct super_block *sb, char *logname, +int lustre_process_log(struct super_block *sb, char *logname, struct config_llog_instance *cfg); -int lustre_end_log(struct super_block *sb, char *logname, +int lustre_end_log(struct super_block *sb, char *logname, struct config_llog_instance *cfg); struct lustre_mount_info *server_find_mount_locked(char *name); struct lustre_mount_info *server_get_mount(char *name); diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index d9a9a3a..775b2ce 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -422,7 +422,8 @@ static int mgs_handle_target_reg(struct ptlrpc_request *req) /* Nothing wrong, or fatal error */ GOTO(out_nolock, rc); } else { - if ((rc = mgs_check_failover_reg(mti))) + if (!(mti->mti_flags & LDD_F_NO_PRIMNODE) + && (rc = mgs_check_failover_reg(mti))) GOTO(out_nolock, rc); } diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index c57e28a..29e1d35 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -1008,7 +1008,6 @@ static int mgs_write_log_failnids(struct obd_device *obd, char *ptr = mti->mti_params; lnet_nid_t nid; int rc = 0; - /* #03 L add_uuid nid=uml1@tcp(0x20000c0a80201) nal=90 0: 1:uml1_UUID #04 L add_uuid nid=1@elan(0x1000000000001) nal=90 0: 1:uml1_UUID diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 3654456..177a1bc 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -923,6 +923,8 @@ static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti) struct lustre_sb_info *lsi = s2lsi(sb); struct lustre_disk_data *ldd = lsi->lsi_ldd; lnet_process_id_t id; + lnet_nid_t nid; + char *ptr; int i = 0; ENTRY; @@ -938,6 +940,19 @@ static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti) while (LNetGetId(i++, &id) != -ENOENT) { if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND) continue; + if ((ldd->ldd_flags & LDD_F_NO_PRIMNODE) != 0) { + /* server use --servicenode param, only allow specified + * nids be registered */ + ptr = ldd->ldd_params; + while (class_find_param(ptr, PARAM_FAILNODE, &ptr) ==0){ + while (class_parse_nid(ptr, &nid, &ptr) == 0) { + if (nid == id.nid) + goto allowed; + } + } + continue; + } +allowed: mti->mti_nids[mti->mti_nid_count] = id.nid; mti->mti_nid_count++; if (mti->mti_nid_count >= MTI_NIDS_MAX) { diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 3d2c816..1c97e58 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -2288,16 +2288,25 @@ test_56() { } run_test 56 "check big indexes" -test_57() { # bug 22656 +test_57a() { # bug 22656 local NID=$(do_facet ost1 "$LCTL get_param nis" | tail -1 | awk '{print $1}') writeconf do_facet ost1 "$TUNEFS --failnode=$NID `ostdevname 1`" || error "tunefs failed" start_mgsmds - stop_mds start_ost && error "OST registration from failnode should fail" reformat } -run_test 57 "initial registration from failnode should fail (should return errs)" +run_test 57a "initial registration from failnode should fail (should return errs)" + +test_57b() { + local NID=$(do_facet ost1 "$LCTL get_param nis" | tail -1 | awk '{print $1}') + writeconf + do_facet ost1 "$TUNEFS --servicenode=$NID `ostdevname 1`" || error "tunefs failed" + start_mgsmds + start_ost || error "OST registration from servicenode should not fail" + reformat +} +run_test 57b "initial registration from servicenode should not fail" test_58() { # bug 22658 [ "$FSTYPE" != "ldiskfs" ] && skip "not supported for $FSTYPE" && return diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index f6b0c83..f01eec6 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -122,6 +122,9 @@ void usage(FILE *out) "\t\t\trequired for all targets other than the mgs node\n" "\t\t--fsname= : default is 'lustre'\n" "\t\t--failnode=[,<...>] : NID(s) of a failover partner\n" + "\t\t\tcannot be used with --servicenode\n" + "\t\t--servicenode=[,<...>] : NID(s) of all service partners\n" + "\t\t\ttreat all nodes as equal service node, cannot be used with --failnode\n" "\t\t--param = : set a permanent parameter\n" "\t\t\te.g. --param sys.timeout=40\n" "\t\t\t --param lov.stripesize=2M\n" @@ -740,7 +743,7 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) printf("Lustre FS: %s\n", ldd->ldd_fsname); printf("Mount type: %s\n", MT_STR(ldd)); printf("Flags: %#x\n", ldd->ldd_flags); - printf(" (%s%s%s%s%s%s%s%s)\n", + printf(" (%s%s%s%s%s%s%s%s%s)\n", IS_MDT(ldd) ? "MDT ":"", IS_OST(ldd) ? "OST ":"", IS_MGS(ldd) ? "MGS ":"", @@ -748,6 +751,7 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) ldd->ldd_flags & LDD_F_VIRGIN ? "first_time ":"", ldd->ldd_flags & LDD_F_UPDATE ? "update ":"", ldd->ldd_flags & LDD_F_WRITECONF ? "writeconf ":"", + ldd->ldd_flags & LDD_F_NO_PRIMNODE? "no_primnode ":"", ldd->ldd_flags & LDD_F_UPGRADE14 ? "upgrade1.4 ":""); printf("Persistent mount opts: %s\n", ldd->ldd_mount_opts); printf("Parameters:%s\n", ldd->ldd_params); @@ -1187,15 +1191,17 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, {"print", 0, 0, 'n'}, {"quiet", 0, 0, 'q'}, {"reformat", 0, 0, 'r'}, + {"servicenode", 1, 0, 's'}, {"verbose", 0, 0, 'v'}, {"writeconf", 0, 0, 'w'}, {0, 0, 0, 0} }; - char *optstring = "b:c:C:d:ef:Ghi:k:L:m:MnNo:Op:Pqru:vw"; + char *optstring = "b:c:C:d:ef:Ghi:k:L:m:MnNo:Op:Pqrsu:vw"; int opt; int rc, longidx; int upcall = 0; const size_t prefix_len = sizeof(PARAM_MDT_UPCALL) - 1; + int failnode_set = 0, servicenode_set = 0; while ((opt = getopt_long(argc, argv, optstring, long_opt, &longidx)) != EOF) { @@ -1237,8 +1243,19 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, /* Must update the mgs logs */ mop->mo_ldd.ldd_flags |= LDD_F_UPDATE; break; - case 'f': { - char *nids = convert_hostnames(optarg); + case 'f': + case 's': { + char *nids; + + if ((opt == 'f' && servicenode_set) + || (opt == 's' && failnode_set)) { + fprintf(stderr, "%s: %s cannot use with --%s\n", + progname, long_opt[longidx].name, + opt == 'f' ? "servicenode" : "failnode"); + return 1; + } + + nids = convert_hostnames(optarg); if (!nids) return 1; rc = add_param(mop->mo_ldd.ldd_params, PARAM_FAILNODE, @@ -1248,6 +1265,12 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, return rc; /* Must update the mgs logs */ mop->mo_ldd.ldd_flags |= LDD_F_UPDATE; + if (opt == 'f') { + failnode_set = 1; + } else { + mop->mo_ldd.ldd_flags |= LDD_F_NO_PRIMNODE; + servicenode_set = 1; + } failover = 1; break; } -- 1.8.3.1