From: Vitaly Fertman Date: Mon, 7 Dec 2015 18:37:46 +0000 (+0300) Subject: LU-7173 mdt: intent vs unlink race X-Git-Tag: 2.7.65~52 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=refs%2Fchanges%2F01%2F17501%2F4;p=fs%2Flustre-release.git LU-7173 mdt: intent vs unlink race a race between intent and unlink results in working on stale object, the intent resend finds the lock and !existent object and LBUG: ASSERTION( ((o)->lo_header->loh_attr & LOHA_EXISTS) != 0 ) in mdt_getattr_name_lock()->lu_object_attr(). a check for getattr is added for !existent object in RESENT case. another case is when resend finds inode cached on thread, osd_iget_check() checking the inode returns ESTALE as nlinks==0. ldlm_lock_enqueue() gets an error from ns_policy() and tries to destroy the found lock, which is granted, getting "lock still on resource" LBUG. It is unclear if the lock reached the client in the original reply, just leave the lock on server, not returning it again to the client. Due to LU-6529, the server will not OOM in case the 2nd reply will be handled on the client. Signed-off-by: Vitaly Fertman Change-Id: I128cd6eeda579c6477bf4564db5e551a46a74d71 Reviewed-on: http://es-gerrit.xyus.xyratex.com:8080/8849 Tested-by: Jenkins Seagate-bug-id: MRP-3042 Reviewed-by: Alexander Nikolaevich Boyko Reviewed-by: Andriy Skulysh Reviewed-by: Alexey Leonidovich Lyashkov Tested-by: Elena V. Gryaznova Reviewed-on: http://review.whamcloud.com/17501 Tested-by: Maloo Reviewed-by: Alexander Boyko Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 88e4a07..5f74f80 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -248,6 +248,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_MDS_REINT_MULTI_NET 0x159 #define OBD_FAIL_MDS_REINT_MULTI_NET_REP 0x15a #define OBD_FAIL_MDS_LLOG_CREATE_FAILED2 0x15b +#define OBD_FAIL_MDS_INTENT_DELAY 0x160 /* layout lock */ #define OBD_FAIL_MDS_NO_LL_GETATTR 0x170 @@ -297,7 +298,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214 #define OBD_FAIL_OST_ENOSPC 0x215 #define OBD_FAIL_OST_EROFS 0x216 -#define OBD_FAIL_OST_ENOENT 0x217 +#define OBD_FAIL_SRV_ENOENT 0x217 /* OBD_FAIL_OST_QUOTACHECK_NET 0x218 obsolete since 2.4 */ #define OBD_FAIL_OST_QUOTACTL_NET 0x219 #define OBD_FAIL_OST_CHECKSUM_RECEIVE 0x21a diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 486e7b3..b402a2d 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -1732,6 +1732,16 @@ enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *ns, } *flags |= LDLM_FL_LOCK_CHANGED; RETURN(0); + } else if (rc != ELDLM_OK && + lock->l_req_mode == lock->l_granted_mode) { + LASSERT(*flags & LDLM_FL_RESENT); + /* It may happen that ns_policy returns an error in + * resend case, object may be unlinked or just some + * error occurs. It is unclear if lock reached the + * client in the original reply, just leave the lock on + * server, not returning it again to client. Due to + * LU-6529, the server will not OOM. */ + RETURN(rc); } else if (rc != ELDLM_OK || (rc == ELDLM_OK && (*flags & LDLM_FL_INTENT_ONLY))) { ldlm_lock_destroy(lock); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 72a6fb2..5060265 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -1515,7 +1515,7 @@ existing_lock: } } - if (rc != 0) { + if (rc != 0 && !(flags & LDLM_FL_RESENT)) { if (lock->l_export) { ldlm_lock_cancel(lock); } else { diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index e27fee3..216cb4d 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -1436,6 +1436,13 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info, GOTO(out_parent, rc = PTR_ERR(child)); OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RESEND, obd_timeout * 2); + if (!mdt_object_exists(child)) { + LU_OBJECT_DEBUG(D_INODE, info->mti_env, + &child->mot_obj, + "Object doesn't exist!\n"); + GOTO(out_child, rc = -ENOENT); + } + rc = mdt_check_resent_lock(info, child, lhc); if (rc < 0) { GOTO(out_child, rc); @@ -1444,13 +1451,6 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info, mdt_lock_reg_init(lhc, LCK_PR); try_layout = false; - if (!mdt_object_exists(child)) { - LU_OBJECT_DEBUG(D_INODE, info->mti_env, - &child->mot_obj, - "Object doesn't exist!\n"); - GOTO(out_child, rc = -ENOENT); - } - if (!(child_bits & MDS_INODELOCK_UPDATE) && mdt_object_exists(child) && !mdt_object_remote(child)) { struct md_attr *ma = &info->mti_attr; @@ -3390,6 +3390,8 @@ static int mdt_intent_opc(enum ldlm_intent_flags itopc, if (flv->it_act != NULL) { struct ldlm_reply *rep; + OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_INTENT_DELAY, 10); + /* execute policy */ rc = flv->it_act(opc, info, lockp, flags); diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index 04445af..0608cd9 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -711,7 +711,7 @@ int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp, LASSERT(oa != NULL); - if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) { + if (OBD_FAIL_CHECK(OBD_FAIL_SRV_ENOENT)) { struct ofd_seq *oseq; oseq = ofd_seq_load(env, ofd, ostid_seq(&oa->o_oi)); diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 619662a..2bf04b4 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -747,7 +747,7 @@ static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj, LASSERT(info); oic = &info->oti_cache; - if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) + if (OBD_FAIL_CHECK(OBD_FAIL_SRV_ENOENT)) RETURN(-ENOENT); /* For the object is created as locking anchor, or for the object to diff --git a/lustre/osd-zfs/osd_oi.c b/lustre/osd-zfs/osd_oi.c index 2333685..ef9aefc 100644 --- a/lustre/osd-zfs/osd_oi.c +++ b/lustre/osd-zfs/osd_oi.c @@ -467,7 +467,7 @@ int osd_fid_lookup(const struct lu_env *env, struct osd_device *dev, int rc = 0; ENTRY; - if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) + if (OBD_FAIL_CHECK(OBD_FAIL_SRV_ENOENT)) RETURN(-ENOENT); if (unlikely(fid_is_acct(fid))) { diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 86d824f..976ecf6 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -2252,6 +2252,56 @@ test_113() { } run_test 113 "ldlm enqueue dropped reply should not cause deadlocks" +T130_PID=0 +test_130_base() { + test_mkdir -p $DIR/$tdir + + # get only LOOKUP lock on $tdir + cancel_lru_locks mdc + ls $DIR/$tdir/$tfile 2>/dev/null + + # get getattr by fid on $tdir + # + # we need to race with unlink, unlink must complete before we will + # take a DLM lock, otherwise unlink will wait until getattr will + # complete; but later than getattr starts so that getattr found + # the object +#define OBD_FAIL_MDS_INTENT_DELAY 0x160 + set_nodes_failloc "$(mdts_nodes)" 0x80000160 + stat $DIR/$tdir & + T130_PID=$! + sleep 2 + + rm -rf $DIR/$tdir + + # drop the reply so that resend happens on an unlinked file. +#define OBD_FAIL_MDS_LDLM_REPLY_NET 0x157 + set_nodes_failloc "$(mdts_nodes)" 0x80000157 +} + +test_130a() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + test_130_base + + wait $T130_PID || [ $? -eq 0 ] && error "stat should fail" + return 0 +} +run_test 130a "enqueue resend on not existing file" + +test_130b() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + test_130_base + # let the reply to be dropped + sleep 10 + +#define OBD_FAIL_SRV_ENOENT 0x217 + set_nodes_failloc "$(mdts_nodes)" 0x80000217 + + wait $T130_PID || [ $? -eq 0 ] && error "stat should fail" + return 0 +} +run_test 130b "enqueue resend on a stale inode" + complete $SECONDS check_and_cleanup_lustre exit_status diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 7a25e4b..a66f35f 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -5649,7 +5649,7 @@ test_68b() { # was test_68 run_test 68b "support swapping to Lustre ========================" # bug5265, obdfilter oa2dentry return -ENOENT -# #define OBD_FAIL_OST_ENOENT 0x217 +# #define OBD_FAIL_SRV_ENOENT 0x217 test_69() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return remote_ost_nodsh && skip "remote OST with nodsh" && return @@ -7597,7 +7597,7 @@ test_118b() reset_async - #define OBD_FAIL_OST_ENOENT 0x217 + #define OBD_FAIL_SRV_ENOENT 0x217 set_nodes_failloc "$(osts_nodes)" 0x217 $MULTIOP $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c RC=$?