Whamcloud - gitweb
LU-3929 lwp: don't connect LWP to old MDT 01/8701/2
authorHongchao Zhang <hongchao.zhang@intel.com>
Thu, 8 Aug 2013 07:58:06 +0000 (15:58 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Mon, 6 Jan 2014 03:49:47 +0000 (03:49 +0000)
When rolling upgrade Lustre from old version without LWP support
(< 2.3.60) to newer version, the OSTs will be upgraded firstly,
then LWP will try to connect the old MDT, and old MDT will regard
the LWP client as normal client and save client export on disk for
recovery, which will break recovery at the end since LWP
connection isn't recoverable.

Change-Id: Ib58c11a9d06b78ac9617563ae1baa4844e0efa45
Signed-off-by: Hongchao Zhang <hongchao.zhang@intel.com>
Signed-off-by: Niu Yawei <yawei.niu@intel.com>
Reviewed-on: http://review.whamcloud.com/8701
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/obdclass/obd_mount_server.c

index 244c57a..e7042a0 100644 (file)
@@ -772,6 +772,14 @@ static int client_lwp_config_process(const struct lu_env *env,
                if (!tgt_is_mdt0(marker->cm_tgtname))
                        GOTO(out, rc = 0);
 
+               /* Don't try to connect old MDT server without LWP support,
+                * otherwise, the old MDT could regard this LWP client as
+                * a normal client and save the export on disk for recovery.
+                *
+                * This usually happen when rolling upgrade. LU-3929 */
+               if (marker->cm_vers < OBD_OCD_VERSION(2, 3, 60, 0))
+                       GOTO(out, rc = 0);
+
                if (!strncmp(marker->cm_comment, "add mdc", 7) ||
                    !strncmp(marker->cm_comment, "add failnid", 11)) {
                        if (marker->cm_flags & CM_START) {
@@ -804,8 +812,16 @@ static int client_lwp_config_process(const struct lu_env *env,
                break;
        }
        case LCFG_ADD_CONN: {
-               if (is_mdc_for_mdt0(lustre_cfg_string(lcfg, 0)))
+               if (is_mdc_for_mdt0(lustre_cfg_string(lcfg, 0)) &&
+                   (clli->cfg_flags & CFG_F_MARKER) != 0) {
                        rc = lustre_lwp_add_conn(lcfg, lsi);
+                       /* When the 'add mdc' record is old (< 2.3.60) but
+                        * 'add failnid' record is new (>= 2.3.60), add
+                        * connection should fail with -ENOENT since LWP
+                        * device wasn't setup, we'd ignore such error. */
+                       if (rc == -ENOENT && clli->cfg_flags & CFG_F_SKIP)
+                               rc = 0;
+               }
                break;
        }
        default: