From 635fd70cd4b783ab73cc1829c74f6048d6795a0f Mon Sep 17 00:00:00 2001 From: nathan Date: Thu, 2 Mar 2006 21:55:23 +0000 Subject: [PATCH] Branch b1_4_mountconf b=9861 add method to describe multiple failover nodes each with multiple nids to mkfs.lustre. --- lustre/include/linux/lustre_disk.h | 30 +++---- lustre/include/linux/lustre_idl.h | 1 + lustre/include/lustre/lustre_user.h | 16 ++-- lustre/mgs/mgs_handler.c | 9 +- lustre/mgs/mgs_internal.h | 1 + lustre/mgs/mgs_llog.c | 166 ++++++++++++++++++++++++++++-------- lustre/obdclass/obd_mount.c | 21 +++-- lustre/ptlrpc/pack_generic.c | 3 + lustre/utils/mkfs_lustre.c | 127 +++++++++++++++++++-------- 9 files changed, 270 insertions(+), 104 deletions(-) diff --git a/lustre/include/linux/lustre_disk.h b/lustre/include/linux/lustre_disk.h index bbfe342..647341a 100644 --- a/lustre/include/linux/lustre_disk.h +++ b/lustre/include/linux/lustre_disk.h @@ -38,8 +38,6 @@ #define MOUNT_DATA_FILE MOUNT_CONFIGS_DIR"/mountdata" #define MDT_LOGS_DIR "LOGS" /* COMPAT_146 */ -#define LDD_MAGIC 0x1dd00001 - #define LDD_F_SV_TYPE_MDT 0x0001 #define LDD_F_SV_TYPE_OST 0x0002 #define LDD_F_SV_TYPE_MGS 0x0004 @@ -78,6 +76,8 @@ static inline char *mt_str(enum ldd_mount_type mt) #define LDD_INCOMPAT_SUPP 0 #define LDD_ROCOMPAT_SUPP 0 +#define LDD_MAGIC 0x1dd00001 + /* FIXME does on-disk ldd have to be a fixed endianness? (like last_rcvd) */ struct lustre_disk_data { __u32 ldd_magic; @@ -89,20 +89,20 @@ struct lustre_disk_data { __u32 ldd_flags; /* LDD_SV_TYPE */ __u32 ldd_svindex; /* server index (0001), must match svname */ - enum ldd_mount_type ldd_mount_type; /* target fs type LDD_MT_* */ - char ldd_fsname[64]; /* filesystem this server is part of */ +/*28*/ char ldd_fsname[64]; /* filesystem this server is part of */ char ldd_svname[64]; /* this server's name (lustre-mdt0001)*/ + enum ldd_mount_type ldd_mount_type; /* target fs type LDD_MT_* */ __u16 ldd_mgsnid_count; __u16 ldd_failnid_count; /* server failover nid count */ - lnet_nid_t ldd_mgsnid[MTI_NIDS_MAX]; /* mgs nid list; lmd can - override */ +/*164*/ lnet_nid_t ldd_mgsnid[MTI_NIDS_MAX]; /* mgs nids; lmd can override */ lnet_nid_t ldd_failnid[MTI_NIDS_MAX]; /* server failover nids */ - /* COMPAT_146 */ - __u8 ldd_uuid[40]; /* server UUID */ - /* end COMPAT_146 */ + __u16 ldd_mgsnode[8]; /* nid count of each node in... */ + __u16 ldd_failnode[8]; /* ...the nid arrays */ + +/*1220*/__u8 ldd_uuid[40]; /* server UUID (COMPAT_146) */ - __u8 ldd_padding[4096 - 1228]; - char ldd_mount_opts[4096]; /* target fs mount opts */ +/*1260*/__u8 ldd_padding[4096 - 1260]; +/*4096*/char ldd_mount_opts[4094]; /* target fs mount opts */ }; #define IS_MDT(data) ((data)->ldd_flags & LDD_F_SV_TYPE_MDT) @@ -128,7 +128,7 @@ static inline int server_make_name(__u32 flags, __u16 index, char *fs, } /* Get the index from the obd name */ -int server_name2index(char *svname, unsigned long *idx, char **endptr); +int server_name2index(char *svname, __u32 *idx, char **endptr); /****************** mount command *********************/ @@ -142,14 +142,14 @@ int server_name2index(char *svname, unsigned long *idx, char **endptr); struct lustre_mount_data { __u32 lmd_magic; __u32 lmd_flags; /* lustre mount flags */ - __u16 lmd_mgsnid_count; /* how many failover nids we have for + int lmd_mgsnid_count; /* how many failover nids we have for the MGS */ - lnet_nid_t lmd_mgsnid[MTI_NIDS_MAX];/* who to contact at startup */ + int lmd_exclude_count; char *lmd_dev; /* device or file system name */ char *lmd_opts; /* lustre mount options (as opposed to _device_ mount options) */ __u32 *lmd_exclude; /* array of OSTs to ignore */ - int lmd_exclude_count; /* number of valid entries in array */ + lnet_nid_t lmd_mgsnid[MTI_NIDS_MAX];/* who to contact at startup */ }; #define LMD_FLG_CLIENT 0x0002 /* Mounting a client only */ diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index 12ec935..efd593d 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -1000,6 +1000,7 @@ struct mgs_target_info { char mti_uuid[sizeof(struct obd_uuid)]; lnet_nid_t mti_nids[MTI_NIDS_MAX]; /* host nids */ lnet_nid_t mti_failnids[MTI_NIDS_MAX]; /* partner nids */ + __u16 mti_failnodes[8]; /* last nid index of each partner */ __u32 mti_stripe_index; __u32 mti_nid_count; __u32 mti_failnid_count; diff --git a/lustre/include/lustre/lustre_user.h b/lustre/include/lustre/lustre_user.h index 767a92f..08f6158 100644 --- a/lustre/include/lustre/lustre_user.h +++ b/lustre/include/lustre/lustre_user.h @@ -176,11 +176,17 @@ static inline void obd_str2uuid(struct obd_uuid *uuid, char *tmp) uuid->uuid[sizeof(*uuid) - 1] = '\0'; } -/* If we're not null-terminated, crash here instead of in printf */ -#define obd_uuid2str(x) ( \ - LASSERT((x)->uuid[sizeof(struct obd_uuid) - 1] == '\0'), \ - (char *)(x)->uuid \ -) +static inline char *obd_uuid2str(struct obd_uuid *uuid) +{ + if (uuid->uuid[sizeof(*uuid) - 1] != '\0') { + /* Obviously not safe, but for printfs, no real harm done...*/ + static char temp[sizeof(*uuid)]; + memcpy(temp, uuid->uuid, sizeof(*uuid)); + temp[sizeof(*uuid) - 1] = '\0'; + return temp; + } + return (char *)(uuid->uuid); +} #define LUSTRE_Q_QUOTAON 0x800002 /* turn quotas on */ #define LUSTRE_Q_QUOTAOFF 0x800003 /* turn quotas off */ diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index a89944a..ddcb2f4 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -316,12 +316,13 @@ static int mgs_check_target(struct obd_device *obd, struct mgs_target_info *mti) mti->mti_flags |= LDD_F_WRITECONF; rc = 1; } else { - /* index is correctly marked used */ - rc = 0; + /* Index is correctly marked as used */ + + /* If the logs don't contain the mti_nids then add + them as failover nids */ + rc = mgs_check_failnid(obd, mti); } - /* FIXME If the logs don't contain the mti_nids then add - them all as failover nids? */ RETURN(rc); } diff --git a/lustre/mgs/mgs_internal.h b/lustre/mgs/mgs_internal.h index 01d17a3..0bd90ee 100644 --- a/lustre/mgs/mgs_internal.h +++ b/lustre/mgs/mgs_internal.h @@ -37,6 +37,7 @@ struct fs_db { int mgs_init_fsdb_list(struct obd_device *obd); int mgs_cleanup_fsdb_list(struct obd_device *obd); int mgs_check_index(struct obd_device *obd, struct mgs_target_info *mti); +int mgs_check_failnid(struct obd_device *obd, struct mgs_target_info *mti); int mgs_write_log_target(struct obd_device *obd, struct mgs_target_info *mti); int mgs_upgrade_sv_14(struct obd_device *obd, struct mgs_target_info *mti); int mgs_erase_logs(struct obd_device *obd, char *fsname); diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index b0f8b86..69cab1d 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -61,7 +61,7 @@ static int mgs_fsdb_handler(struct llog_handle *llh, struct llog_rec_hdr *rec, int cfg_len = rec->lrh_len; char *cfg_buf = (char*) (rec + 1); struct lustre_cfg *lcfg; - unsigned long index; + __u32 index; int rc = 0; ENTRY; @@ -87,7 +87,7 @@ static int mgs_fsdb_handler(struct llog_handle *llh, struct llog_rec_hdr *rec, lcfg->lcfg_command == LCFG_LOV_DEL_OBD) { index = simple_strtoul(lustre_cfg_string(lcfg, 2), NULL, 10); - CDEBUG(D_MGS, "OST index for %s is %lu (%s)\n", + CDEBUG(D_MGS, "OST index for %s is %u (%s)\n", lustre_cfg_string(lcfg, 1), index, lustre_cfg_string(lcfg, 2)); set_bit(index, fsdb->fsdb_ost_index_map); @@ -105,7 +105,7 @@ static int mgs_fsdb_handler(struct llog_handle *llh, struct llog_rec_hdr *rec, index = 0; } rc = 0; - CDEBUG(D_MGS, "MDT index is %lu\n", index); + CDEBUG(D_MGS, "MDT index is %u\n", index); set_bit(index, fsdb->fsdb_mdt_index_map); } @@ -114,8 +114,6 @@ static int mgs_fsdb_handler(struct llog_handle *llh, struct llog_rec_hdr *rec, struct cfg_marker *marker; marker = lustre_cfg_buf(lcfg, 1); fsdb->fsdb_gen = max(fsdb->fsdb_gen, marker->cm_step); - CDEBUG(D_MGS, "marker %d %s\n", marker->cm_step, - marker->cm_comment); } RETURN(rc); @@ -721,28 +719,36 @@ static int mgs_write_log_failnids(struct obd_device *obd, { char *failnodeuuid; lnet_nid_t nid; - int i, rc = 0; + int i, j = 0, rc = 0; if (!mti->mti_failnid_count) return 0; - /* Are these multiple nids for the same failover node, or - multiple nodes? In the former case, there should be only - one add_conn and a single nid uuid. In the latter, - multiple nid uuids and add_conns. Assuming the former here, - since who uses more than 2 failover nodes? */ - /* FWIW, it doesn't look like lconf correctly handles the former case */ - /* - #12 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0: 1:192.168.2.2@tcp - #13 L add_conn 0:lustre-OST0000-osc 1:uml2_UUID + #03 L add_uuid nid=uml1@tcp(0x20000c0a80201) nal=90 0: 1:uml1_UUID + #04 L add_uuid nid=1@elan(0x1000000000001) nal=90 0: 1:uml1_UUID + #05 L setup 0:OSC_uml1_ost1_mdsA 1:ost1_UUID 2:uml1_UUID + #06 L add_uuid nid=uml2@tcp(0x20000c0a80202) nal=90 0: 1:uml2_UUID + #0x L add_uuid nid=2@elan(0x1000000000002) nal=90 0: 1:uml2_UUID + #07 L add_conn 0:OSC_uml1_ost1_mdsA 1:uml2_UUID */ - + + /* We don't know the failover node name, so just use the first nid + as the uuid */ name_create(libcfs_nid2str(mti->mti_failnids[0]), "", &failnodeuuid); for (i = 0; i < mti->mti_failnid_count; i++) { nid = mti->mti_failnids[i]; - CDEBUG(D_MGS, "add nid %s for failover uuid %s\n", - libcfs_nid2str(nid), failnodeuuid); + if (mti->mti_failnodes[j] && (i >= mti->mti_failnodes[j])) { + /* This is the first nid of a new failover node. + add_conn the old uuid, and start a new one. */ + rc = record_add_conn(obd, llh, cliname, failnodeuuid); + name_destroy(failnodeuuid); + name_create(libcfs_nid2str(mti->mti_failnids[i]), + "", &failnodeuuid); + j++; + } + CDEBUG(D_MGS, "add nid %s for failover uuid %s, client %s\n", + libcfs_nid2str(nid), failnodeuuid, cliname); rc = record_add_uuid(obd, llh, nid, failnodeuuid); } rc = record_add_conn(obd, llh, cliname, failnodeuuid); @@ -996,6 +1002,15 @@ static int mgs_write_log_add_failnid(struct obd_device *obd, struct fs_db *fsdb, int rc; ENTRY; + /* Verify that we know about this target */ + if (mgs_log_is_empty(obd, mti->mti_svname)) { + LCONSOLE_ERROR("The target %s has not registered yet. " + "It must be started before failnids can " + "be added.\n", mti->mti_svname); + RETURN(-ENOENT); + } + + /* Create mdc/osc client name (e.g. lustre-OST0001-osc) */ if (mti->mti_flags & LDD_F_SV_TYPE_MDT) { name_create(mti->mti_svname, "-mdc", &cliname); } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) { @@ -1004,11 +1019,6 @@ static int mgs_write_log_add_failnid(struct obd_device *obd, struct fs_db *fsdb, RETURN(-EINVAL); } - /* Verify that we know about this target */ - if (mgs_log_is_empty(obd, mti->mti_svname)) { - CERROR("Missing target log for %s\n", mti->mti_svname); - GOTO(out, rc = -ENOENT); - } /* Add failover nids to client log */ name_create(mti->mti_fsname, "-client", &logname); @@ -1034,40 +1044,73 @@ static int mgs_write_log_add_failnid(struct obd_device *obd, struct fs_db *fsdb, name_destroy(logname); } -out: name_destroy(cliname); RETURN(rc); } +int mgs_check_failnid(struct obd_device *obd, struct mgs_target_info *mti) +{ + /* Not implementing automatic failover nid addition at this time. */ + return 0; +#if 0 + struct fs_db *fsdb; + int rc; + ENTRY; + + rc = mgs_find_or_make_fsdb(obd, fsname, &fsdb); + if (rc) + RETURN(rc); + + if (mgs_log_is_empty(obd, mti->mti_svname)) + /* should never happen */ + RETURN(-ENOENT); + + CDEBUG(D_MGS, "Checking for new failnids for %s\n", mti->mti_svname); + // FIXME check logs + /* FIXME we need a real database lookup. Create on-disk db of known + size, lookup by index */ + /* Check each nid, or check only nid0 and add all if nid0 is missing? + What if someone adds a net to a node? Better check everything. */ + /* if nid 0 is missing, mgs_write_log_add_failnid. + if just one nid is missing, add uuid for nodeuuid[nid0]). + */ + + + down(&fsdb->fsdb_sem); + rc = mgs_write_log_add_failnid(obd, fsdb, mti); + up(&fsdb->fsdb_sem); + + RETURN(rc); +#endif +} + int mgs_write_log_target(struct obd_device *obd, struct mgs_target_info *mti) { struct fs_db *fsdb; - int rc = -EINVAL, addfail = 0; + int rc = -EINVAL; + ENTRY; /* set/check the new target index */ rc = mgs_set_index(obd, mti); if (rc < 0) { CERROR("Can't get index (%d)\n", rc); - return rc; + RETURN(rc); } if (rc == EALREADY) { // FIXME mark old log sections as invalid, add new. - CERROR("%s: Adding failnids only\n", mti->mti_svname); - /* Assume for now we're just updating failover nids */ - addfail++; + CERROR("updates not yet implemented\n"); + RETURN(-EALREADY); } rc = mgs_find_or_make_fsdb(obd, mti->mti_fsname, &fsdb); if (rc) { CERROR("Can't get db for %s\n", mti->mti_fsname); - return rc; + RETURN(rc); } down(&fsdb->fsdb_sem); - if (addfail) { - rc = mgs_write_log_add_failnid(obd, fsdb, mti); - } else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) { + if (mti->mti_flags & LDD_F_SV_TYPE_MDT) { rc = mgs_write_log_mdt(obd, fsdb, mti); } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) { rc = mgs_write_log_ost(obd, fsdb, mti); @@ -1077,7 +1120,7 @@ int mgs_write_log_target(struct obd_device *obd, } up(&fsdb->fsdb_sem); - return rc; + RETURN(rc); } @@ -1373,11 +1416,34 @@ static void print_lustre_cfg(struct lustre_cfg *lcfg) EXIT; } +/* returns 1 if key matches, else 0 */ +static int class_match_key(char *key, char *buf, char **valh) +{ + char *val; + *valh = NULL; + + if (strncmp(key, buf, strlen(key)) != 0) + return 0; + + val = strchr(buf, '='); + if (!val || (*(++val) == 0)) { + CERROR("Key has no value %s\n", buf); + return 0; + } + if (val - buf > strlen(key) + 1) { + /* We didn't match the entire key */ + return 0; + } + + *valh = val; + return 1; +} + /* Set a permanent (config log) param for a target or fs */ int mgs_setparam(struct obd_device *obd, char *fsname, struct lustre_cfg *lcfg) { struct fs_db *fsdb; - char *devname; + char *devname, *val; int rc = 0; ENTRY; @@ -1413,10 +1479,36 @@ int mgs_setparam(struct obd_device *obd, char *fsname, struct lustre_cfg *lcfg) GOTO(out, rc); } + /* add failover nidlist */ + if ((lcfg->lcfg_command == LCFG_PARAM) && + class_match_key("failnid", lustre_cfg_string(lcfg, 1), &val)) { + struct mgs_target_info *mti; + CDEBUG(D_MGS, "failnid, mod MDT, client\n"); + OBD_ALLOC_PTR(mti); + strcpy(mti->mti_fsname, fsname); + strcpy(mti->mti_svname, devname); + rc = server_name2index(devname, &mti->mti_stripe_index, NULL); + if (rc < 0) { + OBD_FREE_PTR(mti); + GOTO(out, rc); + } + mti->mti_flags = rc; + /* FIXME add to lctl. nids must be in lnet_nid_t + form, not ascii - we can't resolve hostnames from the + kernel. */ + mti->mti_failnid_count = simple_strtoul(val, NULL, 10); + memcpy(mti->mti_failnids, lustre_cfg_string(lcfg, 2), + mti->mti_failnid_count * sizeof(mti->mti_failnids[0])); + /* assume these are nids for a single node. */ + rc = mgs_write_log_add_failnid(obd, fsdb, mti); + OBD_FREE_PTR(mti); + GOTO(out, rc); + } + /* lov default stripe params */ - if (strstr(devname, "-mdtlov") && (lcfg->lcfg_command == LCFG_PARAM)) { + if ((lcfg->lcfg_command == LCFG_PARAM) && strstr(devname, "-mdtlov")) { char *lovname, *logname; - CDEBUG(D_MGS, "lov param, mod MDT and client\n"); + CDEBUG(D_MGS, "lov param, mod MDT, client\n"); name_create(fsname, "-MDT0000", &logname); name_create(fsname, "-mdtlov", &lovname); if (strcmp(lovname, devname) != 0) { diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 2f5ca77..6233d16 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -830,10 +830,12 @@ static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti) mti->mti_nid_count, mti->mti_svname); break; } - } - + } + mti->mti_failnid_count = ldd->ldd_failnid_count; memcpy(mti->mti_failnids, ldd->ldd_failnid, sizeof(mti->mti_failnids)); + memcpy(mti->mti_failnodes, ldd->ldd_failnode, + sizeof(mti->mti_failnodes)); memcpy(mti->mti_uuid, ldd->ldd_uuid, sizeof(mti->mti_uuid)); mti->mti_config_ver = 0; mti->mti_flags = ldd->ldd_flags; @@ -1434,9 +1436,11 @@ out: /* Get the index from the obd name. rc = server type, or - rc < 0 on error */ -int server_name2index(char *svname, unsigned long *idx, char **endptr) + rc < 0 on error + if endptr isn't NULL it is set to end of name */ +int server_name2index(char *svname, __u32 *idx, char **endptr) { + unsigned long index; int rc; char *dash = strchr(svname, '-'); if (!dash) { @@ -1451,7 +1455,8 @@ int server_name2index(char *svname, unsigned long *idx, char **endptr) else return(-EINVAL); - *idx = simple_strtoul(dash + 4, endptr, 16); + index = simple_strtoul(dash + 4, endptr, 16); + *idx = index; return rc; } @@ -1508,7 +1513,7 @@ int lustre_check_exclusion(struct super_block *sb, char *svname) { struct lustre_sb_info *lsi = s2lsi(sb); struct lustre_mount_data *lmd = lsi->lsi_lmd; - unsigned long index; + __u32 index; int i, rc; ENTRY; @@ -1516,7 +1521,7 @@ int lustre_check_exclusion(struct super_block *sb, char *svname) if (rc != LDD_F_SV_TYPE_OST) RETURN(0); - CDEBUG(D_MOUNT, "Check exclusion %s (%ld) in %d of %s\n", svname, + CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname, index, lmd->lmd_exclude_count, lmd->lmd_dev); for(i = 0; i < lmd->lmd_exclude_count; i++) { @@ -1532,7 +1537,7 @@ int lustre_check_exclusion(struct super_block *sb, char *svname) static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr) { char *s1 = ptr, *s2; - unsigned long index, *exclude_list; + __u32 index, *exclude_list; int rc = 0; ENTRY; diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index bafc32e..d2cfce5 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -636,6 +636,9 @@ void lustre_swab_mgs_target_info(struct mgs_target_info *mti) __swab64s(&mti->mti_nids[i]); __swab64s(&mti->mti_failnids[i]); } + for (i = 0; i < 8; i++) { + __swab16s(&mti->mti_failnodes[i]); + } __swab32s(&mti->mti_stripe_index); __swab32s(&mti->mti_nid_count); __swab32s(&mti->mti_failnid_count); diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index 87254622..9fe657d 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -70,11 +70,10 @@ void usage(FILE *out) "\t\t--mdt: metadata storage, mutually exclusive with ost\n" "\t\t--mgs: configuration management service - one per site\n" "\toptions (in order of popularity):\n" - "\t\t--mgsnid=[,<...>] : NID(s) of a remote mgs node\n" + "\t\t--mgsnode=[,<...>] : NID(s) of a remote mgs node\n" "\t\t\trequired for all targets other than the mgs node\n" "\t\t--fsname= : default is 'lustre'\n" - "\t\t--failover=[,<...>] : list of NIDs for the failover\n" - "\t\t\tpartners for this target\n" + "\t\t--failnode=[,<...>] : NID(s) of a failover partner\n" "\t\t--index=#N : target index\n" /* FIXME implement 1.6.x "\t\t--configdev=: store configuration info\n" @@ -513,7 +512,7 @@ out: void print_ldd(char *str, struct lustre_disk_data *ldd) { - int i = 0; + int i = 0, j= 0; printf("\n %s:\n", str); printf("Target: %s\n", ldd->ldd_svname); if (ldd->ldd_svindex == INDEX_UNASSIGNED) @@ -534,18 +533,28 @@ void print_ldd(char *str, struct lustre_disk_data *ldd) ldd->ldd_flags & LDD_F_WRITECONF ? "writeconf ":"", ldd->ldd_flags & LDD_F_UPGRADE14 ? "upgrade1.4 ":""); printf("Persistent mount opts: %s\n", ldd->ldd_mount_opts); - printf("MGS nids: "); + printf("MGS"); + j = 0; for (i = 0; i < ldd->ldd_mgsnid_count; i++) { - printf("%c %s", (i == 0) ? ' ' : ',', - libcfs_nid2str(ldd->ldd_mgsnid[i])); + if (i == 0 || i == ldd->ldd_mgsnode[j]) { + if (i) + j++; + printf("\n node %d:", j + 1); + } + printf(" %s", libcfs_nid2str(ldd->ldd_mgsnid[i])); } - printf("\nFailover nids: "); + printf("\nFailover"); + j = 0; for (i = 0; i < ldd->ldd_failnid_count; i++) { - printf("%c %s", (i == 0) ? ' ' : ',', - libcfs_nid2str(ldd->ldd_failnid[i])); + if (i == 0 || i == ldd->ldd_failnode[j]) { + if (i) + j++; + printf("\n node %d:", j + 1); + } + printf(" %s", libcfs_nid2str(ldd->ldd_failnid[i])); } - printf("\n\n"); + printf("\n"); } /* Write the server config files */ @@ -801,6 +810,52 @@ static inline void badopt(const char *opt, char *type) usage(stderr); } + +static int parse_nids(int first_spec, char *buf, __u16 *count, + lnet_nid_t *nids, __u16 *nodes) +{ + int j, i = *count; + char *s1 = buf, *s2; + + if (first_spec == 1) { + /* for the first nid spec in a tunefs, we erase all old nid + info */ + *count = 0; i = 0; + for (j = 0; j < 8; j++) + nodes[j] = 0; + } + + while ((s2 = strsep(&s1, ","))) { + nids[i] = libcfs_str2nid(s2); + if (nids[i] == LNET_NID_ANY) { + fprintf(stderr, "%s: malformed nid %s\n", + progname, s2); + return 1; + } + i++; + if (i >= MTI_NIDS_MAX) { + fprintf(stderr, "%s: too many nids (%s...)\n", + progname, s1); + return 1; + } + } + if (i == *count) + return 0; + + /* mark the last nid index in the node array */ + j = 0; + while (nodes[j] && (j < 8)) + j++; + if (j >= 8) { + fprintf(stderr, "%s: too many nodes (%s...)\n", + progname, buf); + return 1; + } + *count = i; + nodes[j] = i; + return 0; +} + int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, char **mountopts) { @@ -809,11 +864,13 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, {"stripe-count-hint", 1, 0, 'c'}, {"configdev", 1, 0, 'C'}, {"device-size", 1, 0, 'd'}, + {"failnode", 1, 0, 'f'}, {"failover", 1, 0, 'f'}, {"mgs", 0, 0, 'G'}, {"help", 0, 0, 'h'}, {"index", 1, 0, 'i'}, {"mkfsoptions", 1, 0, 'k'}, + {"mgsnode", 1, 0, 'm'}, {"mgsnid", 1, 0, 'm'}, {"mdt", 0, 0, 'M'}, {"fsname",1, 0, 'n'}, @@ -867,20 +924,19 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, mop->mo_device_sz = atol(optarg); break; case 'f': { - int i = 0; - char *s1 = optarg, *s2; - while ((s2 = strsep(&s1, ","))) { - mop->mo_ldd.ldd_failnid[i++] = - libcfs_str2nid(s2); - if (i >= MTI_NIDS_MAX) { - fprintf(stderr, "%s: too many failover " - "nids, ignoring %s...\n", - progname, s1); - break; - } + int rc; + static int first_fail_spec = 1; + rc = parse_nids(first_fail_spec, optarg, + &mop->mo_ldd.ldd_failnid_count, + mop->mo_ldd.ldd_failnid, + mop->mo_ldd.ldd_failnode); + if (rc) { + fprintf(stderr, "%s: bad failover nids\n", + progname); + return 1; } - mop->mo_ldd.ldd_failnid_count = i; mop->mo_ldd.ldd_flags |= LDD_F_UPDATE; + first_fail_spec++; break; } case 'G': @@ -903,24 +959,23 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, sizeof(mop->mo_mkfsopts) - 1); break; case 'm': { - int i = 0; - char *s1 = optarg, *s2; + int rc; + static int first_mgs_spec = 1; if (IS_MGS(&mop->mo_ldd)) { badopt(long_opt[longidx].name, "non-MGS MDT,OST"); return 1; } - while ((s2 = strsep(&s1, ","))) { - mop->mo_ldd.ldd_mgsnid[i++] = - libcfs_str2nid(s2); - if (i >= MTI_NIDS_MAX) { - fprintf(stderr, "%s: too many MGS nids," - " ignoring %s...\n", - progname, s1); - break; - } + rc = parse_nids(first_mgs_spec, optarg, + &mop->mo_ldd.ldd_mgsnid_count, + mop->mo_ldd.ldd_mgsnid, + mop->mo_ldd.ldd_mgsnode); + if (rc) { + fprintf(stderr, "%s: bad MGS nids\n", + progname); + return 1; } - mop->mo_ldd.ldd_mgsnid_count = i; + first_mgs_spec++; break; } case 'M': @@ -992,6 +1047,8 @@ int main(int argc, char *const argv[]) char default_mountopts[512] = ""; int ret = 0; + assert(offsetof(struct lustre_disk_data, ldd_padding) == 1260); + if ((progname = strrchr(argv[0], '/')) != NULL) progname++; else -- 1.8.3.1