Whamcloud - gitweb
LU-4870 lfsck: lock old MDT-object in migrating 82/13182/6
authorFan Yong <fan.yong@intel.com>
Tue, 21 Oct 2014 13:54:21 +0000 (21:54 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 4 Feb 2015 02:47:42 +0000 (02:47 +0000)
According to current metadata migration implementation, before the old
MDT-object is removed, both the new MDT-object and old MDT-object will
reference the same LOV layout. Then if the layout LFSCK finds the new
MDT-object by race, it will regard related OST-object(s) as multiple
referenced case, and will try to create new OST-object(s) for the new
MDT-object. To avoid such trouble, the layout LFSCK needs to lock the
old MDT-object before confirm the multiple referenced case.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I9e42cb86683c33bedfef01ae7f6e2cc305f1137d
Reviewed-on: http://review.whamcloud.com/13182
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: wangdi <di.wang@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/lustre/lustre_idl.h
lustre/include/obd_support.h
lustre/lfsck/lfsck_layout.c
lustre/mdd/mdd_dir.c
lustre/tests/sanity-lfsck.sh

index 44d08e8..4a86f75 100644 (file)
@@ -1717,6 +1717,7 @@ static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi,
 #define XATTR_NAME_SOM         "trusted.som"
 #define XATTR_NAME_HSM         "trusted.hsm"
 #define XATTR_NAME_LFSCK_BITMAP "trusted.lfsck_bitmap"
 #define XATTR_NAME_SOM         "trusted.som"
 #define XATTR_NAME_HSM         "trusted.hsm"
 #define XATTR_NAME_LFSCK_BITMAP "trusted.lfsck_bitmap"
+#define XATTR_NAME_DUMMY       "trusted.dummy"
 
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 8, 53, 0)
 # define XATTR_NAME_LFSCK_NAMESPACE_OLD "trusted.lfsck_namespace"
 
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 8, 53, 0)
 # define XATTR_NAME_LFSCK_NAMESPACE_OLD "trusted.lfsck_namespace"
index 7893c79..6c43bb4 100644 (file)
@@ -553,6 +553,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_MIGRATE_NET_REP               0x1800
 #define OBD_FAIL_MIGRATE_ENTRIES               0x1801
 #define OBD_FAIL_MIGRATE_LINKEA                        0x1802
 #define OBD_FAIL_MIGRATE_NET_REP               0x1800
 #define OBD_FAIL_MIGRATE_ENTRIES               0x1801
 #define OBD_FAIL_MIGRATE_LINKEA                        0x1802
+#define OBD_FAIL_MIGRATE_DELAY                 0x1803
 
 /* LMV */
 #define OBD_FAIL_UNKNOWN_LMV_STRIPE            0x1901
 
 /* LMV */
 #define OBD_FAIL_UNKNOWN_LMV_STRIPE            0x1901
index 02dc24a..e150328 100644 (file)
@@ -3087,6 +3087,7 @@ static int lfsck_layout_check_parent(const struct lu_env *env,
        struct dt_object                *tobj;
        struct lov_mds_md_v1            *lmm;
        struct lov_ost_data_v1          *objs;
        struct dt_object                *tobj;
        struct lov_mds_md_v1            *lmm;
        struct lov_ost_data_v1          *objs;
+       struct lustre_handle             lh     = { 0 };
        int                              rc;
        int                              i;
        __u32                            magic;
        int                              rc;
        int                              i;
        __u32                            magic;
@@ -3120,7 +3121,6 @@ static int lfsck_layout_check_parent(const struct lu_env *env,
        if (IS_ERR(tobj))
                RETURN(PTR_ERR(tobj));
 
        if (IS_ERR(tobj))
                RETURN(PTR_ERR(tobj));
 
-       dt_read_lock(env, tobj, 0);
        if (dt_object_exists(tobj) == 0 ||
            lfsck_is_dead_obj(tobj))
                GOTO(out, rc = LLIT_UNMATCHED_PAIR);
        if (dt_object_exists(tobj) == 0 ||
            lfsck_is_dead_obj(tobj))
                GOTO(out, rc = LLIT_UNMATCHED_PAIR);
@@ -3170,16 +3170,60 @@ static int lfsck_layout_check_parent(const struct lu_env *env,
                }
 
                if (lu_fid_eq(cfid, tfid)) {
                }
 
                if (lu_fid_eq(cfid, tfid)) {
-                       *lov_ea = *buf;
+                       rc = lfsck_ibits_lock(env, com->lc_lfsck, tobj, &lh,
+                                             MDS_INODELOCK_UPDATE |
+                                             MDS_INODELOCK_LAYOUT |
+                                             MDS_INODELOCK_XATTR,
+                                             LCK_EX);
+                       if (rc != 0)
+                               GOTO(out, rc);
+
+                       dt_read_lock(env, tobj, 0);
+
+                       /* For local MDT-object, re-check existence
+                        * after taken the lock. */
+                       if (!dt_object_remote(tobj)) {
+                               if (dt_object_exists(tobj) == 0 ||
+                                   lfsck_is_dead_obj(tobj)) {
+                                       rc = LLIT_UNMATCHED_PAIR;
+                               } else {
+                                       *lov_ea = *buf;
+                                       rc = LLIT_MULTIPLE_REFERENCED;
+                               }
+
+                               GOTO(unlock, rc);
+                       }
+
+                       /* For migration case, the new MDT-object and old
+                        * MDT-object may reference the same OST-object at
+                        * some migration internal time.
+                        *
+                        * For remote MDT-object, the local MDT may not know
+                        * whether it has been removed or not.  Try checking
+                        * for a non-existent xattr to check if this object
+                        * has been been removed or not. */
+                       rc = dt_xattr_get(env, tobj, &LU_BUF_NULL,
+                                         XATTR_NAME_DUMMY, BYPASS_CAPA);
+                       if (unlikely(rc == -ENOENT || rc >= 0)) {
+                               rc = LLIT_UNMATCHED_PAIR;
+                       } else if (rc == -ENODATA) {
+                               *lov_ea = *buf;
+                               rc = LLIT_MULTIPLE_REFERENCED;
+                       }
 
 
-                       GOTO(out, rc = LLIT_MULTIPLE_REFERENCED);
+                       GOTO(unlock, rc);
                }
        }
 
        GOTO(out, rc = LLIT_UNMATCHED_PAIR);
 
                }
        }
 
        GOTO(out, rc = LLIT_UNMATCHED_PAIR);
 
+unlock:
+       if (lustre_handle_is_used(&lh)) {
+               dt_read_unlock(env, tobj);
+               lfsck_ibits_unlock(&lh, LCK_EX);
+       }
+
 out:
 out:
-       dt_read_unlock(env, tobj);
        lfsck_object_put(env, tobj);
 
        return rc;
        lfsck_object_put(env, tobj);
 
        return rc;
index 51794b1..6344e40 100644 (file)
@@ -4085,6 +4085,8 @@ static int mdd_migrate(const struct lu_env *env, struct md_object *pobj,
                if (unlikely(OBD_FAIL_CHECK_RESET(OBD_FAIL_MIGRATE_NET_REP,
                                                  OBD_FAIL_MDS_REINT_NET_REP)))
                        GOTO(put, rc = 0);
                if (unlikely(OBD_FAIL_CHECK_RESET(OBD_FAIL_MIGRATE_NET_REP,
                                                  OBD_FAIL_MDS_REINT_NET_REP)))
                        GOTO(put, rc = 0);
+       } else {
+               OBD_FAIL_TIMEOUT(OBD_FAIL_MIGRATE_DELAY, cfs_fail_val);
        }
 
        /* step 4: update name entry to the new object */
        }
 
        /* step 4: update name entry to the new object */
index 4ad1b23..b89e2d2 100644 (file)
@@ -1574,6 +1574,61 @@ test_15b() {
 }
 run_test 15b "LFSCK can repair unmatched MDT-object/OST-object pairs (2)"
 
 }
 run_test 15b "LFSCK can repair unmatched MDT-object/OST-object pairs (2)"
 
+test_15c() {
+       [ $MDSCOUNT -lt 2 ] &&
+               skip "We need at least 2 MDSes for this test" && return
+
+       echo "#####"
+       echo "According to current metadata migration implementation,"
+       echo "before the old MDT-object is removed, both the new MDT-object"
+       echo "and old MDT-object will reference the same LOV layout. Then if"
+       echo "the layout LFSCK finds the new MDT-object by race, it will"
+       echo "regard related OST-object(s) as multiple referenced case, and"
+       echo "will try to create new OST-object(s) for the new MDT-object."
+       echo "To avoid such trouble, the layout LFSCK needs to lock the old"
+       echo "MDT-object before confirm the multiple referenced case."
+       echo "#####"
+
+       check_mount_and_prep
+       $LFS mkdir -i 1 $DIR/$tdir/a1
+       $LFS setstripe -c 1 -i 0 $DIR/$tdir/a1
+       dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=1
+       cancel_lru_locks osc
+
+       echo "Inject failure stub on MDT1 to delay the migration"
+
+       #define OBD_FAIL_MIGRATE_DELAY                  0x1803
+       do_facet mds2 $LCTL set_param fail_val=5 fail_loc=0x1803
+       echo "Migrate $DIR/$tdir/a1 from MDT1 to MDT0 with delay"
+       $LFS mv -M 0 $DIR/$tdir/a1 &
+
+       sleep 1
+       echo "Trigger layout LFSCK to race with the migration"
+       $START_LAYOUT -A -r || error "(1) Fail to start layout LFSCK!"
+
+       for k in $(seq $MDSCOUNT); do
+               # The LFSCK status query internal is 30 seconds. For the case
+               # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
+               # time to guarantee the status sync up.
+               wait_update_facet mds${k} "$LCTL get_param -n \
+                       mdd.$(facet_svc mds${k}).lfsck_layout |
+                       awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
+                       error "(2) MDS${k} is not the expected 'completed'"
+       done
+
+       do_facet mds2 $LCTL set_param fail_loc=0 fail_val=0
+       local repaired=$($SHOW_LAYOUT |
+                        awk '/^repaired_unmatched_pair/ { print $2 }')
+       [ $repaired -eq 1 ] ||
+               error "(3) Fail to repair unmatched pair: $repaired"
+
+       repaired=$($SHOW_LAYOUT |
+                  awk '/^repaired_multiple_referenced/ { print $2 }')
+       [ $repaired -eq 0 ] ||
+               error "(4) Unexpectedly repaird multiple references: $repaired"
+}
+run_test 15c "LFSCK can repair unmatched MDT-object/OST-object pairs (3)"
+
 test_16() {
        echo "#####"
        echo "If the OST-object's owner information does not match the owner"
 test_16() {
        echo "#####"
        echo "If the OST-object's owner information does not match the owner"