ASSERTION( lock->l_granted_mode != lock->l_req_mode ) is hit
when resending LDLM_FL_REPLAY.
Don't add lock to waiting list as it is already added or granted.
Lustre-commit :
63851b5816bb30687fbf3750380d6b448e9400f1
Lustre-change: http://review.whamcloud.com/10903
Change-Id: Ib8e5d2c7588f6cacd1723529e70d29f63742caad
Xyratex-bug-id: MRP-1944
Signed-off-by: Andriy Skulysh <Andriy_Skulysh@xyratex.com>
Signed-off-by: Vitaly Fertman <vitaly_fertman@xyratex.com>
Reviewed-on: http://review.whamcloud.com/10903
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
Tested-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
}
}
+ if (*flags & LDLM_FL_RESENT)
+ RETURN(ELDLM_OK);
+
/* For a replaying lock, it might be already in granted list. So
* unlinking the lock will cause the interval node to be freed, we
* have to allocate the interval node early otherwise we can't regrant
total_enqueue_wait = cfs_time_sub(cfs_time_current_sec(),
lock->l_last_activity);
+ if (OBD_FAIL_PRECHECK(OBD_FAIL_OST_LDLM_REPLY_NET)) {
+ LDLM_DEBUG(lock, "dropping CP AST");
+ RETURN(0);
+ }
+
req = ptlrpc_request_alloc(lock->l_export->exp_imp_reverse,
&RQF_LDLM_CP_CALLBACK);
if (req == NULL)
}
#endif
- if (unlikely(lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT))
- flags |= LDLM_FL_RESENT;
-
- if (unlikely(flags & (LDLM_FL_REPLAY | LDLM_FL_RESENT))) {
+ if (unlikely((flags & LDLM_FL_REPLAY) ||
+ (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT))) {
/* Find an existing lock in the per-export lock hash */
/* In the function below, .hs_keycmp resolves to
* ldlm_export_lock_keycmp() */
if (lock != NULL) {
DEBUG_REQ(D_DLMTRACE, req, "found existing lock cookie "
LPX64, lock->l_handle.h_cookie);
+ flags |= LDLM_FL_RESENT;
GOTO(existing_lock, rc = 0);
- } else {
- flags &= ~LDLM_FL_RESENT;
}
}
# end commit on sharing tests
+test_24() {
+ cancel_lru_locks osc
+
+ $SETSTRIPE -i 0 -c 1 $DIR/$tfile
+
+ # get lock for the 1st client
+ dd if=/dev/zero of=$DIR/$tfile count=1 >/dev/null ||
+ error "failed to write data"
+
+ # get waiting locks for the 2nd client
+ drop_ldlm_cancel "multiop $DIR2/$tfile Ow512" &
+ sleep 1
+
+#define OBD_FAIL_OST_LDLM_REPLY_NET 0x213
+ # failover, replay and resend replayed waiting locks
+ do_facet ost1 lctl set_param fail_loc=0x80000213
+ fail ost1
+
+ # multiop does not finish because CP AST is skipped;
+ # it is ok to kill it in the test, because CP AST is already re-sent
+ # and it does not hung forever in real life
+ killall multiop
+ wait
+}
+run_test 24 "replay|resend"
+
complete $SECONDS
SLEEP=$((`date +%s` - $NOW))
[ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP