Whamcloud - gitweb
LU-3829 ldlm: MDT mount fails on MDS w/o MGS on it 09/7509/6
authorBobi Jam <bobijam.xu@intel.com>
Mon, 9 Sep 2013 11:02:19 +0000 (19:02 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 18 Sep 2013 08:47:16 +0000 (08:47 +0000)
If we specify multiple --mgsnode for a MDT, when we start MDS upon
it while MGS is no the other node, the MGC import connection will
always select the local nid (which is one of the candidate mgsnode)
since it think its the closest connection.

This patch treats further --mgsnode nids as failover nids, so that
multiple import connections are added for the MGC import.

Signed-off-by: Bobi Jam <bobijam.xu@intel.com>
Change-Id: Ifd015a6df47d4285ff5081a9e1e6eabfc405b193
Reviewed-on: http://review.whamcloud.com/7509
Reviewed-by: Liang Zhen <liang.zhen@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/obdclass/obd_mount.c

index dab4140..25aae80 100644 (file)
@@ -336,36 +336,48 @@ int lustre_start_mgc(struct super_block *sb)
         sprintf(niduuid, "%s_%x", mgcname, i);
        if (IS_SERVER(lsi)) {
                ptr = lsi->lsi_lmd->lmd_mgs;
+               CDEBUG(D_MOUNT, "mgs nids %s.\n", ptr);
                if (IS_MGS(lsi)) {
-                        /* Use local nids (including LO) */
-                        lnet_process_id_t id;
-                        while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
-                                rc = do_lcfg(mgcname, id.nid,
-                                             LCFG_ADD_UUID, niduuid, 0,0,0);
-                        }
-                } else {
-                        /* Use mgsnode= nids */
+                       /* Use local nids (including LO) */
+                       lnet_process_id_t id;
+                       while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
+                               rc = do_lcfg(mgcname, id.nid, LCFG_ADD_UUID,
+                                            niduuid, 0, 0, 0);
+                       }
+               } else {
+                       /* Use mgsnode= nids */
                        /* mount -o mgsnode=nid */
                        if (lsi->lsi_lmd->lmd_mgs) {
                                ptr = lsi->lsi_lmd->lmd_mgs;
                        } else if (class_find_param(ptr, PARAM_MGSNODE,
                                                    &ptr) != 0) {
-                                CERROR("No MGS nids given.\n");
-                                GOTO(out_free, rc = -EINVAL);
-                        }
-                        while (class_parse_nid(ptr, &nid, &ptr) == 0) {
-                                rc = do_lcfg(mgcname, nid,
-                                             LCFG_ADD_UUID, niduuid, 0,0,0);
-                                i++;
-                        }
-                }
+                               CERROR("No MGS nids given.\n");
+                               GOTO(out_free, rc = -EINVAL);
+                       }
+                       /*
+                        * LU-3829.
+                        * Here we only take the first mgsnid as its primary
+                        * serving mgs node, the rest mgsnid will be taken as
+                        * failover mgs node, otherwise they would be takens
+                        * as multiple nids of a single mgs node.
+                        */
+                       while (class_parse_nid(ptr, &nid, &ptr) == 0) {
+                               rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID,
+                                            niduuid, 0, 0, 0);
+                               if (rc == 0) {
+                                       i = 1;
+                                       break;
+                               }
+                       }
+               }
         } else { /* client */
                 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
                 ptr = lsi->lsi_lmd->lmd_dev;
                 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
-                        rc = do_lcfg(mgcname, nid,
-                                     LCFG_ADD_UUID, niduuid, 0,0,0);
-                        i++;
+                       rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID,
+                                    niduuid, 0, 0, 0);
+                       if (rc == 0)
+                               ++i;
                         /* Stop at the first failover nid */
                         if (*ptr == ':')
                                 break;
@@ -394,20 +406,22 @@ int lustre_start_mgc(struct super_block *sb)
         i = 1;
        while (ptr && ((*ptr == ':' ||
               class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) {
-                /* New failover node */
-                sprintf(niduuid, "%s_%x", mgcname, i);
-                j = 0;
+               /* New failover node */
+               sprintf(niduuid, "%s_%x", mgcname, i);
+               j = 0;
                while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
-                        j++;
-                        rc = do_lcfg(mgcname, nid,
-                                     LCFG_ADD_UUID, niduuid, 0,0,0);
-                        if (*ptr == ':')
-                                break;
-                }
-                if (j > 0) {
-                        rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
-                                     niduuid, 0, 0, 0);
-                        i++;
+                       rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID,
+                                    niduuid, 0, 0, 0);
+                       if (rc == 0)
+                               ++j;
+                       if (*ptr == ':')
+                               break;
+               }
+               if (j > 0) {
+                       rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
+                                    niduuid, 0, 0, 0);
+                       if (rc == 0)
+                               ++i;
                 } else {
                         /* at ":/fsname" */
                         break;