From: Bobi Jam Date: Thu, 31 Mar 2011 06:55:10 +0000 (+0800) Subject: LU-57 Allow OSTs to be created with no primary node X-Git-Tag: 2.0.62.0~15 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=80ac0f4ee600d5a0b8d818843562d9328fef2ef0;p=fs%2Flustre-release.git LU-57 Allow OSTs to be created with no primary node Add a --servicenode parameter for mkfs.lustre to treat all service nodes equally. Bugzilla: 19064 Change-Id: I15fc50ec7b808fac03dab92b8c0702a0d054b559 Signed-off-by: Bobi Jam Reviewed-on: http://review.whamcloud.com/378 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Mikhail Pershin Tested-by: Liang Zhen Reviewed-by: Oleg Drokin --- diff --git a/lustre/doc/mkfs.lustre.8 b/lustre/doc/mkfs.lustre.8 index 04d038b..dc92e6c 100644 --- a/lustre/doc/mkfs.lustre.8 +++ b/lustre/doc/mkfs.lustre.8 @@ -47,6 +47,11 @@ Only print what would be done; does not affect the disk .TP .BI \--failnode= nid,... Set the NID(s) of a failover partner. This option can be repeated as desired. +Cannot be used with --servicenode. +.TP +.BI \--servicenode= nid,.... +Set the NID(s) of all service partner. This option treats all nodes as equal +service nodes. Cannot be used with --failnode. .TP .BI \--fsname= filesystem_name The Lustre filesystem this service will be part of. The maximum diff --git a/lustre/doc/tunefs.lustre.8 b/lustre/doc/tunefs.lustre.8 index d7eba1f..6ee0c96 100644 --- a/lustre/doc/tunefs.lustre.8 +++ b/lustre/doc/tunefs.lustre.8 @@ -35,6 +35,11 @@ Remove all previous parameter info .TP .BI \--failnode= nid,... Set the NID(s) of a failover partner. This option can be repeated as desired. +Cannot be used with --servicenode. +.TP +.BI \--servicenode= nid,.... +Set the NID(s) of all service partner. This option treats all nodes as equal +service nodes. Cannot be used with --failnode. .TP .BI \--fsname= filesystem_name The Lustre filesystem this service will be part of. Default is 'lustre' diff --git a/lustre/include/lustre_disk.h b/lustre/include/lustre_disk.h index 6c46056..07c0ca8 100644 --- a/lustre/include/lustre_disk.h +++ b/lustre/include/lustre_disk.h @@ -87,6 +87,8 @@ #define LDD_F_PARAM 0x0400 /** backend fs make use of IAM directory format. */ #define LDD_F_IAM_DIR 0x0800 +/** all nodes are specified as service nodes */ +#define LDD_F_NO_PRIMNODE 0x1000 enum ldd_mount_type { LDD_MT_EXT3 = 0, @@ -311,7 +313,7 @@ static inline void check_lcd(char *obd_name, int index, lcd->lcd_uuid[length - 1] = '\0'; LCONSOLE_ERROR("the client UUID (%s) on %s for exports" - "stored in last_rcvd(index = %d) is bad!\n", + "stored in last_rcvd(index = %d) is bad!\n", lcd->lcd_uuid, obd_name, index); } } diff --git a/lustre/include/lustre_param.h b/lustre/include/lustre_param.h index cfc510c..54a1f6b 100644 --- a/lustre/include/lustre_param.h +++ b/lustre/include/lustre_param.h @@ -54,7 +54,8 @@ int class_get_next_param(char **params, char *copy); int class_match_param(char *buf, char *key, char **valp); int class_parse_nid(char *buf, lnet_nid_t *nid, char **endh); int class_parse_net(char *buf, __u32 *net, char **endh); -int class_match_net(char *buf, lnet_nid_t nid); +int class_match_nid(char *buf, char *key, lnet_nid_t nid); +int class_match_net(char *buf, char *key, __u32 net); /* obd_mount.c */ int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd, char *s1, char *s2, char *s3, char *s4); diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index 266c835..d1e4b65 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -454,7 +454,8 @@ static int mgs_handle_target_reg(struct ptlrpc_request *req) /* Nothing wrong, or fatal error */ GOTO(out_nolock, rc); } else { - if ((rc = mgs_check_failover_reg(mti))) + if (!(mti->mti_flags & LDD_F_NO_PRIMNODE) + && (rc = mgs_check_failover_reg(mti))) GOTO(out_nolock, rc); } diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index 5e08e68..de433ff 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -235,19 +235,42 @@ int class_parse_net(char *buf, __u32 *net, char **endh) return class_parse_value(buf, CLASS_PARSE_NET, (void *)net, endh); } -int class_match_net(char *buf, lnet_nid_t nid) +/* 1 param contains key and match + * 0 param contains key and not match + * -1 param does not contain key + */ +int class_match_nid(char *buf, char *key, lnet_nid_t nid) +{ + lnet_nid_t tmp; + int rc = -1; + + while (class_find_param(buf, key, &buf) == 0) { + /* please restrict to the nids pertaining to + * the specified nids */ + while (class_parse_nid(buf, &tmp, &buf) == 0) { + if (tmp == nid) + return 1; + } + rc = 0; + } + return rc; +} + +int class_match_net(char *buf, char *key, __u32 net) { - __u32 net; + __u32 tmp; + int rc = -1; - while (class_find_param(buf, PARAM_NETWORK, &buf) == 0) { + while (class_find_param(buf, key, &buf) == 0) { /* please restrict to the nids pertaining to * the specified networks */ - while (class_parse_net(buf, &net, &buf) == 0) { - if (LNET_NIDNET(nid) == net) + while (class_parse_net(buf, &tmp, &buf) == 0) { + if (tmp == net) return 1; } + rc = 0; } - return 0; + return rc; } EXPORT_SYMBOL(class_find_param); @@ -255,6 +278,7 @@ EXPORT_SYMBOL(class_get_next_param); EXPORT_SYMBOL(class_match_param); EXPORT_SYMBOL(class_parse_nid); EXPORT_SYMBOL(class_parse_net); +EXPORT_SYMBOL(class_match_nid); EXPORT_SYMBOL(class_match_net); /********************** class fns **********************/ diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 537a6b7..852eaa5 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -980,12 +980,17 @@ static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti) if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND) continue; - if (class_find_param(ldd->ldd_params, - PARAM_NETWORK, NULL) == 0 && - !class_match_net(ldd->ldd_params, id.nid)) { - /* can't match specified network */ + /* server use --servicenode param, only allow specified + * nids be registered */ + if ((ldd->ldd_flags & LDD_F_NO_PRIMNODE) != 0 && + class_match_nid(ldd->ldd_params, + PARAM_FAILNODE, id.nid) < 1) + continue; + + /* match specified network */ + if (!class_match_net(ldd->ldd_params, + PARAM_NETWORK, LNET_NIDNET(id.nid))) continue; - } mti->mti_nids[mti->mti_nid_count] = id.nid; mti->mti_nid_count++; diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index b3d4920..8945d77 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -2637,16 +2637,25 @@ test_56() { } run_test 56 "check big indexes" -test_57() { # bug 22656 +test_57a() { # bug 22656 local NID=$(do_facet ost1 "$LCTL get_param nis" | tail -1 | awk '{print $1}') writeconf do_facet ost1 "$TUNEFS --failnode=$NID `ostdevname 1`" || error "tunefs failed" start_mgsmds start_ost && error "OST registration from failnode should fail" - stop_mds reformat } -run_test 57 "initial registration from failnode should fail (should return errs)" +run_test 57a "initial registration from failnode should fail (should return errs)" + +test_57b() { + local NID=$(do_facet ost1 "$LCTL get_param nis" | tail -1 | awk '{print $1}') + writeconf + do_facet ost1 "$TUNEFS --servicenode=$NID `ostdevname 1`" || error "tunefs failed" + start_mgsmds + start_ost || error "OST registration from servicenode should not fail" + reformat +} +run_test 57b "initial registration from servicenode should not fail" count_osts() { do_facet mgs $LCTL get_param mgs.MGS.live.$FSNAME | grep OST | wc -l diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index 4a8b906..53b9317 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -122,6 +122,9 @@ void usage(FILE *out) "\t\t\trequired for all targets other than the mgs node\n" "\t\t--fsname= : default is 'lustre'\n" "\t\t--failnode=[,<...>] : NID(s) of a failover partner\n" + "\t\t\tcannot be used with --servicenode\n" + "\t\t--servicenode=[,<...>] : NID(s) of all service partners\n" + "\t\t\ttreat all nodes as equal service node, cannot be used with --failnode\n" "\t\t--param = : set a permanent parameter\n" "\t\t\te.g. --param sys.timeout=40\n" "\t\t\t --param lov.stripesize=2M\n" @@ -794,7 +797,7 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) printf("Lustre FS: %s\n", ldd->ldd_fsname); printf("Mount type: %s\n", MT_STR(ldd)); printf("Flags: %#x\n", ldd->ldd_flags); - printf(" (%s%s%s%s%s%s%s%s%s)\n", + printf(" (%s%s%s%s%s%s%s%s%s%s)\n", IS_MDT(ldd) ? "MDT ":"", IS_OST(ldd) ? "OST ":"", IS_MGS(ldd) ? "MGS ":"", @@ -803,6 +806,7 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) ldd->ldd_flags & LDD_F_UPDATE ? "update ":"", ldd->ldd_flags & LDD_F_WRITECONF ? "writeconf ":"", ldd->ldd_flags & LDD_F_IAM_DIR ? "IAM_dir_format ":"", + ldd->ldd_flags & LDD_F_NO_PRIMNODE? "no_primnode ":"", ldd->ldd_flags & LDD_F_UPGRADE14 ? "upgrade1.4 ":""); printf("Persistent mount opts: %s\n", ldd->ldd_mount_opts); printf("Parameters:%s\n", ldd->ldd_params); @@ -1268,15 +1272,17 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, {"print", 0, 0, 'n'}, {"quiet", 0, 0, 'q'}, {"reformat", 0, 0, 'r'}, + {"servicenode", 1, 0, 's'}, {"verbose", 0, 0, 'v'}, {"writeconf", 0, 0, 'w'}, {"upgrade_to_18", 0, 0, 'U'}, {"network", 1, 0, 't'}, {0, 0, 0, 0} }; - char *optstring = "b:c:C:d:ef:Ghi:k:L:m:MnNo:Op:Pqru:vw"; + char *optstring = "b:c:C:d:ef:Ghi:k:L:m:MnNo:Op:Pqrs:t:Uu:vw"; int opt; int rc, longidx; + int failnode_set = 0, servicenode_set = 0; while ((opt = getopt_long(argc, argv, optstring, long_opt, &longidx)) != EOF) { @@ -1323,8 +1329,19 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, /* Must update the mgs logs */ mop->mo_ldd.ldd_flags |= LDD_F_UPDATE; break; - case 'f': { - char *nids = convert_hostnames(optarg); + case 'f': + case 's': { + char *nids; + + if ((opt == 'f' && servicenode_set) + || (opt == 's' && failnode_set)) { + fprintf(stderr, "%s: %s cannot use with --%s\n", + progname, long_opt[longidx].name, + opt == 'f' ? "servicenode" : "failnode"); + return 1; + } + + nids = convert_hostnames(optarg); if (!nids) return 1; rc = add_param(mop->mo_ldd.ldd_params, PARAM_FAILNODE, @@ -1334,6 +1351,12 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, return rc; /* Must update the mgs logs */ mop->mo_ldd.ldd_flags |= LDD_F_UPDATE; + if (opt == 'f') { + failnode_set = 1; + } else { + mop->mo_ldd.ldd_flags |= LDD_F_NO_PRIMNODE; + servicenode_set = 1; + } failover = 1; break; }