From 5d5b06e418f1d4a88afdc932e38a5a285347bcae Mon Sep 17 00:00:00 2001 From: Bobi Jam Date: Mon, 9 Sep 2013 19:02:19 +0800 Subject: [PATCH] LU-3829 ldlm: MDT mount fails on MDS w/o MGS on it If we specify multiple --mgsnode for a MDT, when we start MDS upon it while MGS is no the other node, the MGC import connection will always select the local nid (which is one of the candidate mgsnode) since it think its the closest connection. This patch treats further --mgsnode nids as failover nids, so that multiple import connections are added for the MGC import. Signed-off-by: Bobi Jam Change-Id: Ifd015a6df47d4285ff5081a9e1e6eabfc405b193 Reviewed-on: http://review.whamcloud.com/7509 Reviewed-by: Liang Zhen Reviewed-by: Lai Siyao Tested-by: Hudson Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/obdclass/obd_mount.c | 80 ++++++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 33 deletions(-) diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index fcfca43..057a3fe 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -337,36 +337,48 @@ int lustre_start_mgc(struct super_block *sb) sprintf(niduuid, "%s_%x", mgcname, i); if (IS_SERVER(lsi)) { ptr = lsi->lsi_lmd->lmd_mgs; + CDEBUG(D_MOUNT, "mgs nids %s.\n", ptr); if (IS_MGS(lsi)) { - /* Use local nids (including LO) */ - lnet_process_id_t id; - while ((rc = LNetGetId(i++, &id)) != -ENOENT) { - rc = do_lcfg(mgcname, id.nid, - LCFG_ADD_UUID, niduuid, 0,0,0); - } - } else { - /* Use mgsnode= nids */ + /* Use local nids (including LO) */ + lnet_process_id_t id; + while ((rc = LNetGetId(i++, &id)) != -ENOENT) { + rc = do_lcfg(mgcname, id.nid, LCFG_ADD_UUID, + niduuid, 0, 0, 0); + } + } else { + /* Use mgsnode= nids */ /* mount -o mgsnode=nid */ if (lsi->lsi_lmd->lmd_mgs) { ptr = lsi->lsi_lmd->lmd_mgs; } else if (class_find_param(ptr, PARAM_MGSNODE, &ptr) != 0) { - CERROR("No MGS nids given.\n"); - GOTO(out_free, rc = -EINVAL); - } - while (class_parse_nid(ptr, &nid, &ptr) == 0) { - rc = do_lcfg(mgcname, nid, - LCFG_ADD_UUID, niduuid, 0,0,0); - i++; - } - } + CERROR("No MGS nids given.\n"); + GOTO(out_free, rc = -EINVAL); + } + /* + * LU-3829. + * Here we only take the first mgsnid as its primary + * serving mgs node, the rest mgsnid will be taken as + * failover mgs node, otherwise they would be takens + * as multiple nids of a single mgs node. + */ + while (class_parse_nid(ptr, &nid, &ptr) == 0) { + rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, + niduuid, 0, 0, 0); + if (rc == 0) { + i = 1; + break; + } + } + } } else { /* client */ /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */ ptr = lsi->lsi_lmd->lmd_dev; while (class_parse_nid(ptr, &nid, &ptr) == 0) { - rc = do_lcfg(mgcname, nid, - LCFG_ADD_UUID, niduuid, 0,0,0); - i++; + rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, + niduuid, 0, 0, 0); + if (rc == 0) + ++i; /* Stop at the first failover nid */ if (*ptr == ':') break; @@ -395,20 +407,22 @@ int lustre_start_mgc(struct super_block *sb) i = 1; while (ptr && ((*ptr == ':' || class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) { - /* New failover node */ - sprintf(niduuid, "%s_%x", mgcname, i); - j = 0; + /* New failover node */ + sprintf(niduuid, "%s_%x", mgcname, i); + j = 0; while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) { - j++; - rc = do_lcfg(mgcname, nid, - LCFG_ADD_UUID, niduuid, 0,0,0); - if (*ptr == ':') - break; - } - if (j > 0) { - rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN, - niduuid, 0, 0, 0); - i++; + rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, + niduuid, 0, 0, 0); + if (rc == 0) + ++j; + if (*ptr == ':') + break; + } + if (j > 0) { + rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN, + niduuid, 0, 0, 0); + if (rc == 0) + ++i; } else { /* at ":/fsname" */ break; -- 1.8.3.1