From 5cb0a721aea53ffc8230190c3a0b35e71a47d35b Mon Sep 17 00:00:00 2001 From: Di Wang Date: Tue, 3 Nov 2015 07:32:13 -0800 Subject: [PATCH] LU-7383 mdt: retry for busy lock during migration In migration, if the lock of the migrating object is being cached on other node, it should revoke the lock and retry, instead of return -EBUSY. Signed-off-by: Di Wang Change-Id: I1317681a892b9a21f2c78d7696ca6f94d43bd9bc Reviewed-on: http://review.whamcloud.com/17048 Tested-by: Jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Alex Zhuravlev Reviewed-by: John L. Hammond Reviewed-by: Oleg Drokin --- lustre/mdt/mdt_reint.c | 53 ++++++++++++++++++++++++++++++++++++++++++-------- lustre/tests/sanity.sh | 2 ++ 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index 5663c54..3413ebd 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -1260,6 +1260,7 @@ static int mdt_lock_objects_in_linkea(struct mdt_thread_info *info, struct lu_buf *buf = &info->mti_big_buf; struct linkea_data ldata = { NULL }; int count; + int retry_count; int rc; ENTRY; @@ -1278,6 +1279,10 @@ static int mdt_lock_objects_in_linkea(struct mdt_thread_info *info, RETURN(rc); } + /* ignore the migrating parent(@pobj) */ + retry_count = ldata.ld_leh->leh_reccount - 1; + +again: LASSERT(ldata.ld_leh != NULL); ldata.ld_lee = (struct link_ea_entry *)(ldata.ld_leh + 1); for (count = 0; count < ldata.ld_leh->leh_reccount; count++) { @@ -1326,18 +1331,51 @@ static int mdt_lock_objects_in_linkea(struct mdt_thread_info *info, /* Since this needs to lock all of objects in linkea, to avoid * deadlocks, because it does not follow parent-child order as - * other MDT operation, let's use try_lock here, i.e. it will - * return immediately once there are conflict locks, and return - * EBUSY to client */ + * other MDT operation, let's use try_lock here and if the lock + * cannot be gotten because of conflicting locks, then drop all + * current locks, send an AST to the client, and start again. */ mdt_lock_pdo_init(&mll->mll_lh, LCK_PW, &name); rc = mdt_object_lock_try(info, mdt_pobj, &mll->mll_lh, MDS_INODELOCK_UPDATE); if (rc == 0) { - CDEBUG(D_ERROR, "%s: cannot lock "DFID": rc =%d\n", - mdt_obd_name(mdt), PFID(&fid), rc); - mdt_object_put(info->mti_env, mdt_pobj); + mdt_unlock_list(info, lock_list, rc); + + CDEBUG(D_INFO, "%s: busy lock on "DFID".\n", + mdt_obd_name(mdt), PFID(&fid)); + + if (retry_count == 0) { + mdt_object_put(info->mti_env, mdt_pobj); + OBD_FREE_PTR(mll); + GOTO(out, rc = -EBUSY); + } + + rc = mdt_object_lock(info, mdt_pobj, &mll->mll_lh, + MDS_INODELOCK_UPDATE); + if (rc != 0) { + mdt_object_put(info->mti_env, mdt_pobj); + OBD_FREE_PTR(mll); + GOTO(out, rc); + } + + if (mdt_object_remote(mdt_pobj)) { + struct ldlm_lock *lock; + + /* For remote object, Set lock to cb_atomic, + * so lock can be released in blocking_ast() + * immediately, then the next try_lock will + * have better chance to succeds */ + lock = + ldlm_handle2lock(&mll->mll_lh.mlh_rreg_lh); + LASSERT(lock != NULL); + lock_res_and_lock(lock); + ldlm_set_atomic_cb(lock); + unlock_res_and_lock(lock); + LDLM_LOCK_PUT(lock); + } + mdt_object_unlock_put(info, mdt_pobj, &mll->mll_lh, rc); OBD_FREE_PTR(mll); - GOTO(out, rc = -EBUSY); + retry_count--; + goto again; } rc = 0; INIT_LIST_HEAD(&mll->mll_list); @@ -1346,7 +1384,6 @@ static int mdt_lock_objects_in_linkea(struct mdt_thread_info *info, next: ldata.ld_lee = (struct link_ea_entry *)((char *)ldata.ld_lee + ldata.ld_reclen); - } out: if (rc != 0) diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index d10c8d7..7a25e4b 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -12975,6 +12975,7 @@ test_230e() { mkdir $DIR/$tdir/other_dir touch $DIR/$tdir/migrate_dir/a ln $DIR/$tdir/migrate_dir/a $DIR/$tdir/other_dir/b + ls $DIR/$tdir/other_dir $LFS migrate -m 1 $DIR/$tdir/migrate_dir || error "migrate dir fails" @@ -13018,6 +13019,7 @@ test_230f() { touch $DIR/$tdir/migrate_dir/a ln $DIR/$tdir/migrate_dir/a $DIR/$tdir/other_dir/ln1 ln $DIR/$tdir/migrate_dir/a $DIR/$tdir/other_dir/ln2 + ls $DIR/$tdir/other_dir # a should be migrated to MDT1, since no other links on MDT0 $LFS migrate -m 1 $DIR/$tdir/migrate_dir || -- 1.8.3.1