From e3e82dcd8b32c146670a1aa0af38c327d5ef9556 Mon Sep 17 00:00:00 2001 From: Andriy Skulysh Date: Wed, 29 Jun 2016 15:04:14 +0300 Subject: [PATCH] LU-8349 ldlm: ASSERTION(flock->blocking_export!=0) failed Hash lock protects only during .hs_put_locked. Switch to atomic blocking_refs. Whole policy structure was zeroed twice. Once during enqueue and second time during resend or replay. Policy structure should be initialized with default values only in ldlm_lock_new(). Change-Id: Ib916f64cd03cfe812c86463b4354bf5a9bbcdd56 Seagate-bug-id: MRP-2536, MRP-2909 Signed-off-by: Andriy Skulysh Reviewed-by: Alexander Boyko Reviewed-by: Vitaly Fertman Signed-off-by: Ben Evans Reviewed-on: http://review.whamcloud.com/21061 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Mike Pershin Reviewed-by: Oleg Drokin --- lustre/include/lustre_dlm.h | 3 +-- lustre/ldlm/ldlm_extent.c | 1 - lustre/ldlm/ldlm_flock.c | 9 ++++----- lustre/ldlm/ldlm_inodebits.c | 1 - lustre/ldlm/ldlm_lockd.c | 16 ++++++++-------- lustre/tests/multiop.c | 5 +++++ lustre/tests/recovery-small.sh | 23 +++++++++++++++++++++++ 7 files changed, 41 insertions(+), 17 deletions(-) diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index 6c6a0ec..2dd19cd 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -624,8 +624,7 @@ struct ldlm_flock { __u64 owner; __u64 blocking_owner; struct obd_export *blocking_export; - /* Protected by the hash lock */ - __u32 blocking_refs; + atomic_t blocking_refs; __u32 pid; }; diff --git a/lustre/ldlm/ldlm_extent.c b/lustre/ldlm/ldlm_extent.c index 42c84f0..721e458 100644 --- a/lustre/ldlm/ldlm_extent.c +++ b/lustre/ldlm/ldlm_extent.c @@ -1065,7 +1065,6 @@ void ldlm_extent_unlink_lock(struct ldlm_lock *lock) void ldlm_extent_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, union ldlm_policy_data *lpolicy) { - memset(lpolicy, 0, sizeof(*lpolicy)); lpolicy->l_extent.start = wpolicy->l_extent.start; lpolicy->l_extent.end = wpolicy->l_extent.end; lpolicy->l_extent.gid = wpolicy->l_extent.gid; diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c index f13bc5a..bcf8a1e 100644 --- a/lustre/ldlm/ldlm_flock.c +++ b/lustre/ldlm/ldlm_flock.c @@ -108,7 +108,7 @@ static inline void ldlm_flock_blocking_link(struct ldlm_lock *req, lock->l_policy_data.l_flock.owner; req->l_policy_data.l_flock.blocking_export = lock->l_export; - req->l_policy_data.l_flock.blocking_refs = 0; + atomic_set(&req->l_policy_data.l_flock.blocking_refs, 0); cfs_hash_add(req->l_export->exp_flock_hash, &req->l_policy_data.l_flock.owner, @@ -843,7 +843,6 @@ int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, void ldlm_flock_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, union ldlm_policy_data *lpolicy) { - memset(lpolicy, 0, sizeof(*lpolicy)); lpolicy->l_flock.start = wpolicy->l_flock.lfw_start; lpolicy->l_flock.end = wpolicy->l_flock.lfw_end; lpolicy->l_flock.pid = wpolicy->l_flock.lfw_pid; @@ -902,7 +901,7 @@ ldlm_export_flock_get(struct cfs_hash *hs, struct hlist_node *hnode) flock = &lock->l_policy_data.l_flock; LASSERT(flock->blocking_export != NULL); class_export_get(flock->blocking_export); - flock->blocking_refs++; + atomic_inc(&flock->blocking_refs); } static void @@ -912,15 +911,15 @@ ldlm_export_flock_put(struct cfs_hash *hs, struct hlist_node *hnode) struct ldlm_flock *flock; lock = hlist_entry(hnode, struct ldlm_lock, l_exp_flock_hash); - LDLM_LOCK_RELEASE(lock); flock = &lock->l_policy_data.l_flock; LASSERT(flock->blocking_export != NULL); class_export_put(flock->blocking_export); - if (--flock->blocking_refs == 0) { + if (atomic_dec_and_test(&flock->blocking_refs)) { flock->blocking_owner = 0; flock->blocking_export = NULL; } + LDLM_LOCK_RELEASE(lock); } static struct cfs_hash_ops ldlm_export_flock_ops = { diff --git a/lustre/ldlm/ldlm_inodebits.c b/lustre/ldlm/ldlm_inodebits.c index 74119fb..cb93ce0 100644 --- a/lustre/ldlm/ldlm_inodebits.c +++ b/lustre/ldlm/ldlm_inodebits.c @@ -252,7 +252,6 @@ int ldlm_process_inodebits_lock(struct ldlm_lock *lock, __u64 *flags, void ldlm_ibits_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, union ldlm_policy_data *lpolicy) { - memset(lpolicy, 0, sizeof(*lpolicy)); lpolicy->l_inodebits.bits = wpolicy->l_inodebits.bits; } diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 566b9d62..98c3d80 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -1341,6 +1341,14 @@ int ldlm_handle_enqueue0(struct ldlm_namespace *ns, * without them. */ lock->l_flags |= ldlm_flags_from_wire(dlm_req->lock_flags & LDLM_FL_INHERIT_MASK); + + ldlm_convert_policy_to_local(req->rq_export, + dlm_req->lock_desc.l_resource.lr_type, + &dlm_req->lock_desc.l_policy_data, + &lock->l_policy_data); + if (dlm_req->lock_desc.l_resource.lr_type == LDLM_EXTENT) + lock->l_req_extent = lock->l_policy_data.l_extent; + existing_lock: if (flags & LDLM_FL_HAS_INTENT) { @@ -1362,14 +1370,6 @@ existing_lock: GOTO(out, rc); } - if (dlm_req->lock_desc.l_resource.lr_type != LDLM_PLAIN) - ldlm_convert_policy_to_local(req->rq_export, - dlm_req->lock_desc.l_resource.lr_type, - &dlm_req->lock_desc.l_policy_data, - &lock->l_policy_data); - if (dlm_req->lock_desc.l_resource.lr_type == LDLM_EXTENT) - lock->l_req_extent = lock->l_policy_data.l_extent; - err = ldlm_lock_enqueue(ns, &lock, cookie, &flags); if (err) { if ((int)err < 0) diff --git a/lustre/tests/multiop.c b/lustre/tests/multiop.c index 40a8cb4..820722c 100644 --- a/lustre/tests/multiop.c +++ b/lustre/tests/multiop.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -415,6 +416,10 @@ int main(int argc, char **argv) exit(save_errno); } break; + case 'j': + if (flock(fd, LOCK_EX) == -1) + errx(-1, "flock()"); + break; case 'K': oldpath = POP_ARG(); if (oldpath == NULL) diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 1c498b7..73f13cf 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -2528,6 +2528,29 @@ test_131() { } run_test 131 "IO vs evict results to IO under staled lock" +test_133() { + local list=$(comma_list $(mdts_nodes)) + + local t=$((TIMEOUT * 2)) + touch $DIR/$tfile + + flock $DIR/$tfile -c "echo bl lock;sleep $t;echo bl flock unlocked" & + sleep 1 + multiop_bg_pause $DIR/$tfile O_jc || return 1 + PID=$! + + #define OBD_FAIL_LDLM_REPLY 0x30c + do_nodes $list $LCTL set_param fail_loc=0x8000030c + kill -USR1 $PID + echo "waiting for multiop $PID" + wait $PID || return 2 + + rm -f $DIR/$tfile + + return 0 +} +run_test 133 "don't fail on flock resend" + complete $SECONDS check_and_cleanup_lustre exit_status -- 1.8.3.1