From: Fan Yong Date: Tue, 21 Oct 2014 13:54:21 +0000 (+0800) Subject: LU-4870 lfsck: lock old MDT-object in migrating X-Git-Tag: 2.6.94~19 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=19ae9b3605bd41348b2bd64accc50a34eaaa35a1 LU-4870 lfsck: lock old MDT-object in migrating According to current metadata migration implementation, before the old MDT-object is removed, both the new MDT-object and old MDT-object will reference the same LOV layout. Then if the layout LFSCK finds the new MDT-object by race, it will regard related OST-object(s) as multiple referenced case, and will try to create new OST-object(s) for the new MDT-object. To avoid such trouble, the layout LFSCK needs to lock the old MDT-object before confirm the multiple referenced case. Signed-off-by: Fan Yong Change-Id: I9e42cb86683c33bedfef01ae7f6e2cc305f1137d Reviewed-on: http://review.whamcloud.com/13182 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Lai Siyao Reviewed-by: wangdi Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 44d08e8..4a86f75 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -1717,6 +1717,7 @@ static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi, #define XATTR_NAME_SOM "trusted.som" #define XATTR_NAME_HSM "trusted.hsm" #define XATTR_NAME_LFSCK_BITMAP "trusted.lfsck_bitmap" +#define XATTR_NAME_DUMMY "trusted.dummy" #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 8, 53, 0) # define XATTR_NAME_LFSCK_NAMESPACE_OLD "trusted.lfsck_namespace" diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 7893c79..6c43bb4 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -553,6 +553,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_MIGRATE_NET_REP 0x1800 #define OBD_FAIL_MIGRATE_ENTRIES 0x1801 #define OBD_FAIL_MIGRATE_LINKEA 0x1802 +#define OBD_FAIL_MIGRATE_DELAY 0x1803 /* LMV */ #define OBD_FAIL_UNKNOWN_LMV_STRIPE 0x1901 diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index 02dc24a..e150328 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -3087,6 +3087,7 @@ static int lfsck_layout_check_parent(const struct lu_env *env, struct dt_object *tobj; struct lov_mds_md_v1 *lmm; struct lov_ost_data_v1 *objs; + struct lustre_handle lh = { 0 }; int rc; int i; __u32 magic; @@ -3120,7 +3121,6 @@ static int lfsck_layout_check_parent(const struct lu_env *env, if (IS_ERR(tobj)) RETURN(PTR_ERR(tobj)); - dt_read_lock(env, tobj, 0); if (dt_object_exists(tobj) == 0 || lfsck_is_dead_obj(tobj)) GOTO(out, rc = LLIT_UNMATCHED_PAIR); @@ -3170,16 +3170,60 @@ static int lfsck_layout_check_parent(const struct lu_env *env, } if (lu_fid_eq(cfid, tfid)) { - *lov_ea = *buf; + rc = lfsck_ibits_lock(env, com->lc_lfsck, tobj, &lh, + MDS_INODELOCK_UPDATE | + MDS_INODELOCK_LAYOUT | + MDS_INODELOCK_XATTR, + LCK_EX); + if (rc != 0) + GOTO(out, rc); + + dt_read_lock(env, tobj, 0); + + /* For local MDT-object, re-check existence + * after taken the lock. */ + if (!dt_object_remote(tobj)) { + if (dt_object_exists(tobj) == 0 || + lfsck_is_dead_obj(tobj)) { + rc = LLIT_UNMATCHED_PAIR; + } else { + *lov_ea = *buf; + rc = LLIT_MULTIPLE_REFERENCED; + } + + GOTO(unlock, rc); + } + + /* For migration case, the new MDT-object and old + * MDT-object may reference the same OST-object at + * some migration internal time. + * + * For remote MDT-object, the local MDT may not know + * whether it has been removed or not. Try checking + * for a non-existent xattr to check if this object + * has been been removed or not. */ + rc = dt_xattr_get(env, tobj, &LU_BUF_NULL, + XATTR_NAME_DUMMY, BYPASS_CAPA); + if (unlikely(rc == -ENOENT || rc >= 0)) { + rc = LLIT_UNMATCHED_PAIR; + } else if (rc == -ENODATA) { + *lov_ea = *buf; + rc = LLIT_MULTIPLE_REFERENCED; + } - GOTO(out, rc = LLIT_MULTIPLE_REFERENCED); + GOTO(unlock, rc); } } GOTO(out, rc = LLIT_UNMATCHED_PAIR); +unlock: + if (lustre_handle_is_used(&lh)) { + dt_read_unlock(env, tobj); + lfsck_ibits_unlock(&lh, LCK_EX); + } + out: - dt_read_unlock(env, tobj); lfsck_object_put(env, tobj); return rc; diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index 51794b1..6344e40 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -4085,6 +4085,8 @@ static int mdd_migrate(const struct lu_env *env, struct md_object *pobj, if (unlikely(OBD_FAIL_CHECK_RESET(OBD_FAIL_MIGRATE_NET_REP, OBD_FAIL_MDS_REINT_NET_REP))) GOTO(put, rc = 0); + } else { + OBD_FAIL_TIMEOUT(OBD_FAIL_MIGRATE_DELAY, cfs_fail_val); } /* step 4: update name entry to the new object */ diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 4ad1b23..b89e2d2 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -1574,6 +1574,61 @@ test_15b() { } run_test 15b "LFSCK can repair unmatched MDT-object/OST-object pairs (2)" +test_15c() { + [ $MDSCOUNT -lt 2 ] && + skip "We need at least 2 MDSes for this test" && return + + echo "#####" + echo "According to current metadata migration implementation," + echo "before the old MDT-object is removed, both the new MDT-object" + echo "and old MDT-object will reference the same LOV layout. Then if" + echo "the layout LFSCK finds the new MDT-object by race, it will" + echo "regard related OST-object(s) as multiple referenced case, and" + echo "will try to create new OST-object(s) for the new MDT-object." + echo "To avoid such trouble, the layout LFSCK needs to lock the old" + echo "MDT-object before confirm the multiple referenced case." + echo "#####" + + check_mount_and_prep + $LFS mkdir -i 1 $DIR/$tdir/a1 + $LFS setstripe -c 1 -i 0 $DIR/$tdir/a1 + dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=1 + cancel_lru_locks osc + + echo "Inject failure stub on MDT1 to delay the migration" + + #define OBD_FAIL_MIGRATE_DELAY 0x1803 + do_facet mds2 $LCTL set_param fail_val=5 fail_loc=0x1803 + echo "Migrate $DIR/$tdir/a1 from MDT1 to MDT0 with delay" + $LFS mv -M 0 $DIR/$tdir/a1 & + + sleep 1 + echo "Trigger layout LFSCK to race with the migration" + $START_LAYOUT -A -r || error "(1) Fail to start layout LFSCK!" + + for k in $(seq $MDSCOUNT); do + # The LFSCK status query internal is 30 seconds. For the case + # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough + # time to guarantee the status sync up. + wait_update_facet mds${k} "$LCTL get_param -n \ + mdd.$(facet_svc mds${k}).lfsck_layout | + awk '/^status/ { print \\\$2 }'" "completed" $LTIME || + error "(2) MDS${k} is not the expected 'completed'" + done + + do_facet mds2 $LCTL set_param fail_loc=0 fail_val=0 + local repaired=$($SHOW_LAYOUT | + awk '/^repaired_unmatched_pair/ { print $2 }') + [ $repaired -eq 1 ] || + error "(3) Fail to repair unmatched pair: $repaired" + + repaired=$($SHOW_LAYOUT | + awk '/^repaired_multiple_referenced/ { print $2 }') + [ $repaired -eq 0 ] || + error "(4) Unexpectedly repaird multiple references: $repaired" +} +run_test 15c "LFSCK can repair unmatched MDT-object/OST-object pairs (3)" + test_16() { echo "#####" echo "If the OST-object's owner information does not match the owner"